STRELKA GERMLINE¶
Call germline variants with Strelka.
Example¶
This wrapper can be used in the following way:
rule strelka_germline:
input:
# the required bam file
bam="mapped/{sample}.bam",
# path to reference genome fasta and index
fasta="genome.fasta",
fasta_index="genome.fasta.fai",
output:
# Strelka results - either use directory or complete file path
variants="strelka/{sample}.vcf.gz",
variants_index="strelka/{sample}.vcf.gz.tbi",
sample_genomes=["strelka/{sample}.genome.vcf.gz"],
sample_genomes_indices=["strelka/{sample}.genome.vcf.gz.tbi"],
log:
"logs/strelka/germline/{sample}.log",
params:
# optional parameters
config_extra="",
run_extra="",
threads: 8
wrapper:
"v2.6.0/bio/strelka/germline"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Software dependencies¶
strelka=2.9.10
Authors¶
- Jan Forster
- Christopher Schröder
Code¶
__author__ = "Jan Forster, Christopher Schröder"
__copyright__ = "Copyright 2019, Jan Forster"
__email__ = "jan.forster@uk-essen.de"
__license__ = "MIT"
import tempfile
import glob
from pathlib import Path
from snakemake.shell import shell
config_extra = snakemake.params.get("config_extra", "")
run_extra = snakemake.params.get("run_extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
bam = snakemake.input.get("bam") # input bam file, required
assert bam is not None, "input-> bam is a required input parameter"
if isinstance(bam, str):
bam = [bam]
if snakemake.output.get("sample_genomes"):
assert len(bam) == len(
snakemake.output.get("sample_genomes")
), "number of input bams and sample_genomes must be equal "
if snakemake.output.get("sample_genomes_indices"):
assert len(bam) == len(
snakemake.output.get("sample_genomes_indices")
), "number of input bams and sample_genomes_indices must be equal "
bam_input = " ".join(f"--bam {b}" for b in bam)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"(configureStrelkaGermlineWorkflow.py " # configure the strelka run
"{bam_input} " # input bam
"--referenceFasta {snakemake.input.fasta} " # reference genome
"--runDir {tmpdir} " # output directory
"{config_extra} " # additional parameters for the configuration
"&& {tmpdir}/runWorkflow.py " # run the strelka workflow
"-m local " # run in local mode
"-j {snakemake.threads} " # number of threads
"{run_extra}) " # additional parameters for the run
"{log}"
) # logging
if snakemake.output.get("variants"):
shell(
"cat {tmpdir}/results/variants/variants.vcf.gz > {snakemake.output.variants:q}"
)
if snakemake.output.get("variants_index"):
shell(
"cat {tmpdir}/results/variants/variants.vcf.gz.tbi > {snakemake.output.variants_index:q}"
)
if targets := snakemake.output.get("sample_genomes"):
origins = glob.glob(f"{tmpdir}/results/variants/genome.S*.vcf.gz")
assert len(origins) == len(targets)
for origin, target in zip(origins, targets):
shell(f"cat {origin} > {target}")
if targets := snakemake.output.get("sample_genomes_indices"):
origins = glob.glob(f"{tmpdir}/results/variants/genome.S*.vcf.gz.tbi")
assert len(origins) == len(targets)
for origin, target in zip(origins, targets):
shell(f"cat {origin} > {target}")