.. _`bio/bwa-memx/mem`: BWA-MEMX ======== .. image:: https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/bwa-memx/mem?label=version%20update%20pull%20requests :target: https://github.com/snakemake/snakemake-wrappers/pulls?q=is%3Apr+is%3Aopen+label%3Abio/bwa-memx/mem Collection of bwa-mem, bwa-mem2 and bwa-meme. Example ------- This wrapper can be used in the following way: .. code-block:: python rule bwa_memx_mem: input: reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], reference="genome.fasta", idx=multiext( "genome.fasta", ".amb", ".ann", ".bwt", ".pac", ".sa", ), output: "mapped/mem/{sample}.cram", # Output can be .cram, .bam, or .sam log: "logs/bwa_memx/{sample}.log", params: bwa="bwa-mem", # Can be 'bwa-mem, bwa-mem2 or bwa-meme. extra=r"-R '@RG\tID:{sample}\tSM:{sample}' -M", sort="samtools", # Can be 'none' or 'samtools or picard'. sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. sort_extra="", # Extra args for samtools. dedup="mark", # Can be 'none' (default), 'mark' or 'remove'. dedup_extra="-M", # Extra args for samblaster. exceed_thread_limit=True, # Set threads als for samtools sort / view (total used CPU may exceed threads!) embed_ref=True, # Embed reference when writing cram. threads: 8 wrapper: "v3.0.1/bio/bwa-memx/mem" rule bwa_memx_mem2: input: reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], reference="genome.fasta", idx=multiext( "genome.fasta", ".0123", ".amb", ".ann", ".bwt.2bit.64", ".pac", ), output: "mapped/mem2/{sample}.cram", log: "logs/bwa_memx/{sample}.log", params: bwa="bwa-mem2", extra=r"-R '@RG\tID:{sample}\tSM:{sample}' -M", sort="picard", sort_order="queryname", sort_extra="", dedup="none", dedup_extra="-M", exceed_thread_limit=True, embed_ref=True, threads: 8 wrapper: "v3.0.1/bio/bwa-memx/mem" rule bwa_memx_meme: input: reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], reference="genome.fasta", idx=multiext( "genome.fasta", ".0123", ".amb", ".ann", ".pac", ".pos_packed", ".suffixarray_uint64", ".suffixarray_uint64_L0_PARAMETERS", ".suffixarray_uint64_L1_PARAMETERS", ".suffixarray_uint64_L2_PARAMETERS", ), output: "mapped/meme/{sample}.cram", log: "logs/bwa_memx/{sample}.log", params: bwa="bwa-meme", extra=r"-R '@RG\tID:{sample}\tSM:{sample}' -M", sort="picard", sort_order="coordinate", sort_extra="", dedup="remove", dedup_extra="-M", exceed_thread_limit=False, embed_ref=False, threads: 8 wrapper: "v3.0.1/bio/bwa-memx/mem" Note that input, output and log file paths can be chosen freely. When running with .. code-block:: bash snakemake --use-conda the software dependencies will be automatically deployed into an isolated environment before execution. Software dependencies --------------------- * ``bwa=0.7.17`` * ``bwa-mem2=2.2.1`` * ``bwa-meme=1.0.6`` * ``samtools=1.18`` * ``samblaster=0.1.26`` * ``mbuffer=20160228`` * ``picard-slim=3.1.1`` Authors ------- * Christopher Schröder * Johannes Köster * Julian de Ruiter Code ---- .. code-block:: python __author__ = "Christopher Schröder, Johannes Köster, Julian de Ruiter" __copyright__ = ( "Copyright 2020, Christopher Schröder, Johannes Köster and Julian de Ruiter" ) __email__ = "christopher.schroeder@tu-dortmund.de koester@jimmy.harvard.edu, julianderuiter@gmail.com" __license__ = "MIT" from os import path from snakemake.shell import shell # Extract arguments. extra = snakemake.params.get("extra", "") sort = snakemake.params.get("sort", "none") sort_order = snakemake.params.get("sort_order", "coordinate") sort_extra = snakemake.params.get("sort_extra", "") embed_ref = snakemake.params.get("embed_ref", False) bwa = snakemake.params.get("bwa", "bwa-mem") # Option to set the threads of samtools sort and view to the snakemake limit. # In theory, bwa and alternate and samtools view starts only when sort is # finished, so that never more threads are used than the limit. But it can # not always be guaranteed. exceed_thread_limit = snakemake.params.get("exceed_thread_limit", False) dedup = snakemake.params.get("dedup", "none") dedup_extra = snakemake.params.get("dedup_extra", "") # Detect output format. if snakemake.output[0].endswith(".sam"): output_format = "cram" elif snakemake.output[0].endswith(".bam"): output_format = "bam" elif snakemake.output[0].endswith(".cram"): output_format = "cram" else: raise ValueError("output file format must be .sam, .bam or .cram") if embed_ref: output_format += ",embed_ref" if exceed_thread_limit: samtools_threads = snakemake.threads else: samtools_threads = 1 reference = snakemake.input.get("reference") log = snakemake.log_fmt_shell(stdout=False, stderr=True) # Check inputs/arguments. if not isinstance(snakemake.input.reads, str) and len(snakemake.input.reads) not in { 1, 2, }: raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") if sort_order not in {"coordinate", "queryname"}: raise ValueError("Unexpected value for sort_order ({})".format(sort_order)) # Determine which pipe command to use for converting to bam or sorting. if sort == "none": # Simply convert to bam using samtools view. pipe_cmd = "samtools view -h -O {output_format} -o {snakemake.output[0]} -T {reference} -@ {samtools_threads} -" elif sort == "samtools": pipe_cmd = "samtools sort {sort_extra} -O {output_format} -o {snakemake.output[0]} --reference {reference} -@ {samtools_threads} -" # Add name flag if needed. if sort_order == "queryname": sort_extra += " -n" prefix = path.splitext(snakemake.output[0])[0] sort_extra += " -T " + prefix + ".tmp" # Sort alignments using samtools sort. elif sort == "picard": # Sort alignments using picard SortSam. pipe_cmd = ( "picard SortSam {sort_extra} -I /dev/stdin" " -O /dev/stdout -SO {sort_order} | samtools view -h -O {output_format} -o {snakemake.output[0]} -T {reference} -@ {samtools_threads} -" ) else: raise ValueError("Unexpected value for params.sort ({})".format(sort)) # Determine which pipe command to use for converting to bam or sorting. if dedup == "none": # Do not detect duplicates. dedup_cmd = "" elif dedup == "mark": # Mark duplicates using samblaster. dedup_cmd = "samblaster -q {dedup_extra} | " elif dedup == "remove": dedup_cmd = "samblaster -q -r {dedup_extra} | " else: raise ValueError("Unexpected value for params.dedup ({})".format(dedup)) if bwa == "bwa-mem": bwa_cmd = "bwa mem" elif bwa == "bwa-mem2": bwa_cmd = "bwa-mem2 mem" elif bwa == "bwa-meme": bwa_cmd = "bwa-meme mem -7" else: raise ValueError( "Unexpected value for params.bwa ({}). Must be bwa-mem, bwa-mem2 or bwa-meme.".format( bwa ) ) shell( " ({bwa_cmd}" " -t {snakemake.threads}" " {extra}" " {reference}" " {snakemake.input.reads}" " | mbuffer -q -m 2G " " | " + dedup_cmd + pipe_cmd + ") {log}" ) .. |nl| raw:: html