GATK VARIANTRECALIBRATOR
Run gatk VariantRecalibrator.
URL: https://gatk.broadinstitute.org/hc/en-us/articles/9570466678811-VariantRecalibrator
Example
This wrapper can be used in the following way:
from snakemake.remote import HTTP
https = HTTP.RemoteProvider(allow_redirects=True)
rule haplotype_caller:
input:
vcf=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/phase1.projectConsensus.chr20.1M-10M.raw.snps.vcf"
),
ref=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/human_g1k_v37.20.21.fasta"
),
fai=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/human_g1k_v37.20.21.fasta.fai"
),
dict=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/human_g1k_v37.20.21.dict"
),
mills=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.20.1M-10M.vcf"
),
mills_idx=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.20.1M-10M.vcf.idx"
),
omni=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/Omni25_sites_1525_samples.b37.20.1M-10M.vcf"
),
omni_idx=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/Omni25_sites_1525_samples.b37.20.1M-10M.vcf.idx"
),
g1k=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/combined.phase1.chr20.raw.indels.filtered.sites.1M-10M.vcf"
),
g1k_idx=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/combined.phase1.chr20.raw.indels.filtered.sites.1M-10M.vcf.idx"
),
dbsnp=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/dbsnp_132_b37.leftAligned.20.1M-10M.vcf"
),
dbsnp_idx=https.remote(
"github.com/broadinstitute/gatk/raw/4.2.5.0/src/test/resources/large/VQSR/dbsnp_132_b37.leftAligned.20.1M-10M.vcf.idx"
),
output:
vcf="calls/all.recal.vcf",
idx="calls/all.recal.vcf.idx",
tranches="calls/all.tranches",
log:
"logs/gatk/variantrecalibrator.log",
params:
mode="SNP", # set mode, must be either SNP, INDEL or BOTH
resources={
"mills": {"known": False, "training": True, "truth": True, "prior": 15.0},
"omni": {"known": False, "training": True, "truth": False, "prior": 12.0},
"g1k": {"known": False, "training": True, "truth": False, "prior": 10.0},
"dbsnp": {"known": True, "training": False, "truth": False, "prior": 2.0},
},
annotation=["MQ", "QD", "SB"],
extra="--max-gaussians 2", # optional
threads: 1
resources:
mem_mb=1024,
wrapper:
"bio/gatk/variantrecalibrator"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Notes
The java_opts param allows for additional arguments to be passed to the java compiler, e.g. “-XX:ParallelGCThreads=10” (not for -XmX or -Djava.io.tmpdir, since they are handled automatically).
The extra param allows for additional program arguments.
Software dependencies
gatk4=4.6.1.0
snakemake-wrapper-utils=0.6.2
google-cloud-sdk
google-crc32c
Input/Output
Input:
VCF file
Output:
.recal file
.tranches file
Code
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2018, Johannes Köster"
__email__ = "johannes.koester@protonmail.com"
__license__ = "MIT"
import os
import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts
extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
def fmt_res(resname, resparams):
fmt_bool = lambda b: str(b).lower()
try:
f = snakemake.input.get(resname)
except KeyError:
raise RuntimeError(
f"There must be a named input file for every resource (missing: {resname})"
)
return "{},known={},training={},truth={},prior={} {}".format(
resname,
fmt_bool(resparams["known"]),
fmt_bool(resparams["training"]),
fmt_bool(resparams["truth"]),
resparams["prior"],
f,
)
annotation_resources = [
"--resource:{}".format(fmt_res(resname, resparams))
for resname, resparams in snakemake.params["resources"].items()
]
annotation = list(map("-an {}".format, snakemake.params.annotation))
tranches = snakemake.output.get("tranches", "")
if tranches:
tranches = f"--tranches-file {tranches}"
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' VariantRecalibrator"
" --variant {snakemake.input.vcf}"
" --reference {snakemake.input.ref}"
" --mode {snakemake.params.mode}"
" {annotation_resources}"
" {tranches}"
" {annotation}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.vcf}"
" {log}"
)