BOWTIE2
Map reads with bowtie2.
URL: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
Example
This wrapper can be used in the following way:
rule test_bowtie2:
input:
sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx=multiext(
"index/genome",
".1.bt2",
".2.bt2",
".3.bt2",
".4.bt2",
".rev.1.bt2",
".rev.2.bt2",
),
# ref="genome.fasta", #Required for CRAM output
output:
"mapped/{sample}.bam",
# idx="",
# metrics="",
# unaligned="",
# unpaired="",
# unconcordant="",
# concordant="",
log:
"logs/bowtie2/{sample}.log",
params:
extra="", # optional parameters
threads: 8 # Use at least two threads
wrapper:
"v4.6.0-24-g250dd3e/bio/bowtie2/align"
use rule test_bowtie2 as test_bowtie2_se_gz with:
input:
sample=["reads/{sample}.1.fastq.gz"],
idx=multiext(
"index/genome",
".1.bt2",
".2.bt2",
".3.bt2",
".4.bt2",
".rev.1.bt2",
".rev.2.bt2",
),
output:
"mapped_se_gz/{sample}.bam",
rule test_bowtie2_index:
input:
sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx=multiext(
"index/genome",
".1.bt2",
".2.bt2",
".3.bt2",
".4.bt2",
".rev.1.bt2",
".rev.2.bt2",
),
output:
"mapped_idx/{sample}.bam",
idx="mapped_idx/{sample}.bam.bai",
metrics="mapped_idx/{sample}.metrics.txt",
unaligned="mapped_idx/{sample}.unaligned.sam",
unpaired="mapped_idx/{sample}.unpaired.sam",
# unconcordant="",
# concordant="",
log:
"logs/bowtie2/{sample}.log",
params:
extra="", # optional parameters
threads: 8 # Use at least two threads
wrapper:
"v4.6.0-24-g250dd3e/bio/bowtie2/align"
rule test_bowtie2_cram:
input:
sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
idx=multiext(
"index/genome",
".1.bt2",
".2.bt2",
".3.bt2",
".4.bt2",
".rev.1.bt2",
".rev.2.bt2",
),
ref="genome.fasta",
output:
"mapped_idx/{sample}.cram",
# idx="",
# metrics="",
# unaligned="",
# unpaired="",
# unconcordant="",
# concordant="",
log:
"logs/bowtie2/{sample}.log",
params:
extra="", # optional parameters
threads: 8 # Use at least two threads
wrapper:
"v4.6.0-24-g250dd3e/bio/bowtie2/align"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Notes
This wrapper uses an inner pipe. Make sure to use at least two threads in your Snakefile.
Software dependencies
bowtie2=2.5.4
samtools=1.21
snakemake-wrapper-utils=0.6.2
Input/Output
Input:
sample
: FASTQ file(s)idx
: Bowtie2 indexed reference indexref
: Optional path to genome sequence (FASTA)ref_fai
: Optional path to reference genome sequence index (FAI)
Output:
SAM/BAM/CRAM file. This must be the first output file in the output file list.
idx
: Optional path to bam index.metrics
: Optional path to metrics file.unaligned
: Optional path to unaligned unpaired reads.unpaired
: Optional path to unpaired reads that aligned at least once.unconcordant
: Optional path to pairs that didn’t align concordantly.concordant
: Optional path to pairs that aligned concordantly at least once.
Params
extra
: additional program arguments (except for -x, -U, -1, -2, –interleaved, -b, –met-file, –un, –al, –un-conc, –al-conc, -f, –tab6, –tab5, -q, or -p/–threads)interleaved
: Input sample contains interleaved paired-end FASTQ/FASTA reads. False`(default) or `True.
Code
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"
import os
from snakemake.shell import shell
from snakemake_wrapper_utils.samtools import get_samtools_opts
def get_format(path: str) -> str:
"""
Return file format since Bowtie2 reads files that
could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2).
"""
if path.endswith((".gz", ".bz2")):
return path.split(".")[-2].lower()
return path.split(".")[-1].lower()
bowtie2_threads = snakemake.threads - 1
if bowtie2_threads < 1:
raise ValueError(
f"This wrapper expected at least two threads, got {snakemake.threads}"
)
# Setting parse_threads to false since samtools performs only
# bam compression. Thus the wrapper would use *twice* the amount
# of threads reserved by user otherwise.
samtools_opts = get_samtools_opts(snakemake, parse_threads=False)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
n = len(snakemake.input.sample)
assert (
n == 1 or n == 2
), "input->sample must have 1 (single-end) or 2 (paired-end) elements."
reads = ""
if n == 1:
if get_format(snakemake.input.sample[0]) in ("bam", "sam"):
reads = f"-b {snakemake.input.sample}"
else:
if snakemake.params.get("interleaved", False):
reads = f"--interleaved {snakemake.input.sample}"
else:
reads = f"-U {snakemake.input.sample}"
else:
reads = "-1 {} -2 {}".format(*snakemake.input.sample)
if all(get_format(sample) in ("fastq", "fq") for sample in snakemake.input.sample):
extra += " -q "
elif all(get_format(sample) == "tab5" for sample in snakemake.input.sample):
extra += " --tab5 "
elif all(get_format(sample) == "tab6" for sample in snakemake.input.sample):
extra += " --tab6 "
elif all(
get_format(sample) in ("fa", "mfa", "fasta") for sample in snakemake.input.sample
):
extra += " -f "
metrics = snakemake.output.get("metrics")
if metrics:
extra += f" --met-file {metrics} "
unaligned = snakemake.output.get("unaligned")
if unaligned:
extra += f" --un {unaligned} "
unpaired = snakemake.output.get("unpaired")
if unpaired:
extra += f" --al {unpaired} "
unconcordant = snakemake.output.get("unconcordant")
if unconcordant:
extra += f" --un-conc {unconcordant} "
concordant = snakemake.output.get("concordant")
if concordant:
extra += f" --al-conc {concordant} "
index = os.path.commonprefix(snakemake.input.idx).rstrip(".")
shell(
"(bowtie2"
" --threads {bowtie2_threads}"
" {reads} "
" -x {index}"
" {extra}"
"| samtools view --with-header "
" {samtools_opts}"
" -"
") {log}"
)