LIBARCHIVE - COMPRESS
A simple tool to compress multiple files into commonly used archives (.zip, .7zip, .tar, etc).
URL: https://github.com/libarchive/libarchive
Example
This wrapper can be used in the following way:
rule compress_single_file_to_7zip:
input:
"a.txt",
output:
"results/test.7z",
log:
"logs/libarchive/compress/compress_single_file_to_7zip.log",
threads: 1
params:
format_name="7zip",
wrapper:
"v9.8.0/utils/libarchive/compress"
rule compress_multiple_files_to_tar_gz:
input:
"a.txt",
"b.md",
output:
"results/test.tar.gz",
log:
"logs/libarchive/compress/compress_multiple_files_to_tar_gz.log",
threads: 1
params:
internal_paths=["text/textfile_a.txt", "markdown/markdown_b.md"],
format_name="pax",
filter_name="gzip",
wrapper:
"v9.8.0/utils/libarchive/compress"
rule compress_single_file_to_zip_with_password:
input:
"a.txt",
output:
"results/test.zip",
log:
"logs/libarchive/compress/compress_single_file_to_zip_with_password.log",
threads: 1
params:
format_name="zip",
extra={"passphrase": "t0ps3cr3t", "options": "zip:encryption=aes256"},
wrapper:
"v9.8.0/utils/libarchive/compress"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Software dependencies
python-libarchive-c=5.3
Input/Output
Input:
Paths of the files to compress.
Output:
Location of the compressed archive (.zip, .7zip, .tar, etc.).
Params
format_name: Compression format (e.g., “7zip”, “pax”, “zip”). See the libarchive documentation for the full list.filter_name: Compression filter to use. Options vary per format.
internal_paths: Optional. Destination path(s) for the files within the archive. Must correspond one-to-one with the output files, in the same order. If omitted, all files are added at the archive root.extra: Optional. Dictionary with additional arguments for libarchive.file_writer().
Code
"""libarchive compression.
Enables archive compression for various formats (zip, 7zip, tar, etc) across platforms.
"""
__author__ = "Ivan Ruiz Manuel"
__copyright__ = "Copyright 2026, Ivan Ruiz Manuel"
__email__ = "i.ruizmanuel@tudelft.nl"
__license__ = "MIT"
import sys
from pathlib import Path
import re
import libarchive
_WINDOWS_DRIVE_RE = re.compile(r"^[A-Za-z]:")
def _listify(x: str | list | None) -> list | None:
if isinstance(x, str):
x = [x]
return x
def _validate_input_file(path: Path) -> None:
"""Reject invalid input paths."""
if not path.is_file():
raise ValueError(f"Input file {path!r} is invalid.")
def _validate_internal_path(path: str) -> None:
"""Reject archive paths that would be unsafe or ambiguous.
Valid paths are exclusively relative POSIX-style paths such as:
- "foo.txt"
- "dir/foo.txt"
"""
if (
not path
or path.startswith("/")
or path.endswith("/")
or "\\" in path
or _WINDOWS_DRIVE_RE.match(path)
):
raise ValueError(f"Internal path {path!r} is invalid.")
parts = path.split("/")
if any(part in {"", ".", ".."} for part in parts):
raise ValueError(f"Internal path {path!r} is invalid.")
def compress(
archive_path: Path | str,
input_files: list[Path | str],
format_name: str,
filter_name: str | None = None,
internal_paths: list[str] | None = None,
**kwargs,
) -> None:
"""Create an archive containing the given files.
Args:
archive_path: Path where the archive should be created.
input_files: Files to add to the archive.
format_name: libarchive format name ("zip", "pax", "7zip", "gnutar", "ustar", etc).
filter_name: Optional libarchive compression filter name ("gzip", "xz", "zstd", etc).
internal_paths: Optional archive-internal paths for the input files.
If provided, must have the same length as input_files.
"""
archive_path = Path(archive_path)
input_files = [Path(path) for path in input_files]
if internal_paths is None:
internal_paths = [path.name for path in input_files]
if len(input_files) != len(internal_paths):
raise ValueError("Input files and internal paths must have the same length.")
if len(set(internal_paths)) != len(internal_paths):
raise ValueError("Internal paths must not contain duplicates.")
created = False
try:
with libarchive.file_writer(
str(archive_path),
format_name,
filter_name=filter_name,
**kwargs,
) as archive:
created = True
for input_path, archive_name in zip(input_files, internal_paths):
_validate_input_file(input_path)
_validate_internal_path(archive_name)
archive.add_file(
str(input_path),
pathname=archive_name,
)
except Exception:
if created:
archive_path.unlink(missing_ok=True)
raise
def main():
"""Main snakemake process."""
sys.stderr = open(snakemake.log[0], "w", buffering=1)
compress(
archive_path=snakemake.output[0],
input_files=_listify(snakemake.input),
format_name=snakemake.params.format_name,
filter_name=snakemake.params.get("filter_name", None),
internal_paths=_listify(snakemake.params.get("internal_paths", None)),
**snakemake.params.get("extra", {}),
)
if __name__ == "__main__":
main()