LIBARCHIVE - COMPRESS

https://img.shields.io/badge/wrapper_version-v9.8.0-10785b https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/utils/libarchive/compress?label=version%20update%20pull%20requests&color=1cb481

A simple tool to compress multiple files into commonly used archives (.zip, .7zip, .tar, etc).

URL: https://github.com/libarchive/libarchive

Example

This wrapper can be used in the following way:

rule compress_single_file_to_7zip:
    input:
        "a.txt",
    output:
        "results/test.7z",
    log:
        "logs/libarchive/compress/compress_single_file_to_7zip.log",
    threads: 1
    params:
        format_name="7zip",
    wrapper:
        "v9.8.0/utils/libarchive/compress"


rule compress_multiple_files_to_tar_gz:
    input:
        "a.txt",
        "b.md",
    output:
        "results/test.tar.gz",
    log:
        "logs/libarchive/compress/compress_multiple_files_to_tar_gz.log",
    threads: 1
    params:
        internal_paths=["text/textfile_a.txt", "markdown/markdown_b.md"],
        format_name="pax",
        filter_name="gzip",
    wrapper:
        "v9.8.0/utils/libarchive/compress"


rule compress_single_file_to_zip_with_password:
    input:
        "a.txt",
    output:
        "results/test.zip",
    log:
        "logs/libarchive/compress/compress_single_file_to_zip_with_password.log",
    threads: 1
    params:
        format_name="zip",
        extra={"passphrase": "t0ps3cr3t", "options": "zip:encryption=aes256"},
    wrapper:
        "v9.8.0/utils/libarchive/compress"

Note that input, output and log file paths can be chosen freely.

When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Software dependencies

  • python-libarchive-c=5.3

Input/Output

Input:

  • Paths of the files to compress.

Output:

  • Location of the compressed archive (.zip, .7zip, .tar, etc.).

Params

  • format_name: Compression format (e.g., “7zip”, “pax”, “zip”). See the libarchive documentation for the full list.

  • filter_name: Compression filter to use. Options vary per format.

Common combinations:
  • For zip and 7zip: none.

  • For pax: gzip or xz (*.tar.gz, and *.tar.xz).

  • internal_paths: Optional. Destination path(s) for the files within the archive. Must correspond one-to-one with the output files, in the same order. If omitted, all files are added at the archive root.

  • extra: Optional. Dictionary with additional arguments for libarchive.file_writer().

Authors

  • Ivan Ruiz Manuel

Code

"""libarchive compression.

Enables archive compression for various formats (zip, 7zip, tar, etc) across platforms.
"""

__author__ = "Ivan Ruiz Manuel"
__copyright__ = "Copyright 2026, Ivan Ruiz Manuel"
__email__ = "i.ruizmanuel@tudelft.nl"
__license__ = "MIT"

import sys
from pathlib import Path
import re

import libarchive

_WINDOWS_DRIVE_RE = re.compile(r"^[A-Za-z]:")


def _listify(x: str | list | None) -> list | None:
    if isinstance(x, str):
        x = [x]
    return x


def _validate_input_file(path: Path) -> None:
    """Reject invalid input paths."""
    if not path.is_file():
        raise ValueError(f"Input file {path!r} is invalid.")


def _validate_internal_path(path: str) -> None:
    """Reject archive paths that would be unsafe or ambiguous.

    Valid paths are exclusively relative POSIX-style paths such as:
    - "foo.txt"
    - "dir/foo.txt"
    """
    if (
        not path
        or path.startswith("/")
        or path.endswith("/")
        or "\\" in path
        or _WINDOWS_DRIVE_RE.match(path)
    ):
        raise ValueError(f"Internal path {path!r} is invalid.")
    parts = path.split("/")
    if any(part in {"", ".", ".."} for part in parts):
        raise ValueError(f"Internal path {path!r} is invalid.")


def compress(
    archive_path: Path | str,
    input_files: list[Path | str],
    format_name: str,
    filter_name: str | None = None,
    internal_paths: list[str] | None = None,
    **kwargs,
) -> None:
    """Create an archive containing the given files.

    Args:
        archive_path: Path where the archive should be created.
        input_files: Files to add to the archive.
        format_name: libarchive format name ("zip", "pax", "7zip", "gnutar", "ustar", etc).
        filter_name: Optional libarchive compression filter name ("gzip", "xz", "zstd", etc).
        internal_paths: Optional archive-internal paths for the input files.
            If provided, must have the same length as input_files.
    """
    archive_path = Path(archive_path)
    input_files = [Path(path) for path in input_files]
    if internal_paths is None:
        internal_paths = [path.name for path in input_files]

    if len(input_files) != len(internal_paths):
        raise ValueError("Input files and internal paths must have the same length.")
    if len(set(internal_paths)) != len(internal_paths):
        raise ValueError("Internal paths must not contain duplicates.")

    created = False
    try:
        with libarchive.file_writer(
            str(archive_path),
            format_name,
            filter_name=filter_name,
            **kwargs,
        ) as archive:
            created = True
            for input_path, archive_name in zip(input_files, internal_paths):
                _validate_input_file(input_path)
                _validate_internal_path(archive_name)
                archive.add_file(
                    str(input_path),
                    pathname=archive_name,
                )
    except Exception:
        if created:
            archive_path.unlink(missing_ok=True)
        raise


def main():
    """Main snakemake process."""
    sys.stderr = open(snakemake.log[0], "w", buffering=1)

    compress(
        archive_path=snakemake.output[0],
        input_files=_listify(snakemake.input),
        format_name=snakemake.params.format_name,
        filter_name=snakemake.params.get("filter_name", None),
        internal_paths=_listify(snakemake.params.get("internal_paths", None)),
        **snakemake.params.get("extra", {}),
    )


if __name__ == "__main__":
    main()