BWA-MEM2 INDEX

https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/bwa-memx/index?label=version%20update%20pull%20requests

Creates a bwa-mem, bwa-mem2 or bwa-meme index.

Example

This wrapper can be used in the following way:

rule bwa_mem_index:
    input:
        "{genome}",
    output:
        multiext(
            "{genome}",
            ".amb",
            ".ann",
            ".bwt",
            ".pac",
            ".sa",
        ),
    log:
        "logs/bwa-mem_index/{genome}.log",
    params:
        bwa="bwa-mem",
    threads: 8
    wrapper:
        "v4.6.0-24-g250dd3e/bio/bwa-memx/index"


rule bwa_mem2_index:
    input:
        "{genome}",
    output:
        multiext(
            "{genome}",
            ".0123",
            ".amb",
            ".ann",
            ".bwt.2bit.64",
            ".pac",
        ),
    log:
        "logs/bwa-mem2_index/{genome}.log",
    params:
        bwa="bwa-mem2",
    threads: 8
    wrapper:
        "v4.6.0-24-g250dd3e/bio/bwa-memx/index"


rule bwa_meme_index:
    input:
        "{genome}",
    output:
        multiext(
            "{genome}",
            ".0123",
            ".amb",
            ".ann",
            ".pac",
            ".pos_packed",
            ".suffixarray_uint64",
            ".suffixarray_uint64_L0_PARAMETERS",
            ".suffixarray_uint64_L1_PARAMETERS",
            ".suffixarray_uint64_L2_PARAMETERS",
        ),
    log:
        "logs/bwa-meme_index/{genome}.log",
    params:
        bwa="bwa-meme",
    threads: 8
    wrapper:
        "v4.6.0-24-g250dd3e/bio/bwa-memx/index"

Note that input, output and log file paths can be chosen freely.

When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Software dependencies

  • bwa=0.7.18

  • bwa-mem2=2.2.1

  • bwa-meme=1.0.6

Authors

  • Christopher Schröder

  • Patrik Smeds

Code

__author__ = "Christopher Schröder, Patrik Smeds"
__copyright__ = "Copyright 2022, Christopher Schröder, Patrik Smeds"
__email__ = "christopher.schroeder@tu-dortmund.de, patrik.smeds@gmail.com"
__license__ = "MIT"

from os import path

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

bwa = snakemake.params.get("bwa", "bwa-mem")

# Check inputs/arguments.
if len(snakemake.input) == 0:
    raise ValueError("A reference genome has to be provided.")
elif len(snakemake.input) > 1:
    raise ValueError("Please provide exactly one reference genome as input.")

if bwa == "bwa-mem":
    valid_suffixes = {
        ".amb",
        ".ann",
        ".bwt",
        ".pac",
        ".sa",
    }
    cmd = "bwa index {prefix} {snakemake.input[0]}"
elif bwa == "bwa-mem2":
    valid_suffixes = {
        ".0123",
        ".amb",
        ".ann",
        ".bwt.2bit.64",
        ".pac",
    }
    cmd = "bwa-mem2 index -p {prefix} {snakemake.input[0]}"
elif bwa == "bwa-meme":
    valid_suffixes = {
        ".0123",
        ".amb",
        ".ann",
        ".pac",
        ".pos_packed",
        ".suffixarray_uint64",
        ".suffixarray_uint64_L0_PARAMETERS",
        ".suffixarray_uint64_L1_PARAMETERS",
        ".suffixarray_uint64_L2_PARAMETERS",
    }

    cmd = "bwa-meme index -a meme -p {prefix} {snakemake.input[0]} -t {snakemake.threads} && bwa-meme-train-prmi -t {snakemake.threads} --data-path {dirname} {suffixarray} {basename} pwl,linear,linear_spline {num_models}"
else:
    raise ValueError(
        "Unexpected value for params.bwa ({}). Must be bwa-mem, bwa-mem2 or bwa-meme.".format(
            bwa
        )
    )


def get_valid_suffix(path):
    for suffix in valid_suffixes:
        if path.endswith(suffix):
            return suffix


prefixes = set()
for s in snakemake.output:
    suffix = get_valid_suffix(s)
    if suffix is None:
        raise ValueError(f"{s} cannot be generated by bwa-meme index (invalid suffix).")
    prefixes.add(s[: -len(suffix)])

if len(prefixes) != 1:
    raise ValueError("Output files must share common prefix up to their file endings.")
(prefix,) = prefixes

suffixarray = snakemake.input[0] + ".suffixarray_uint64"
dirname = path.dirname(suffixarray)
basename = path.basename(suffixarray)
num_models = snakemake.params.get("num_models", 268435456)  # change only for testing!

if not dirname:
    dirname = "."

shell(f"({cmd}) {log}")