VERIFYBAMID2
Run verifybamid2.
Example
This wrapper can be used in the following way:
rule verify_bam_id:
input:
bam="a.bam",
ref="genome.fasta",
# optional - this can be used to specify custom resource files if
# necessary (if using GRCh37 or GRCh38 instead simply specify
# params.genome_build="38", for example)
# N.B. if svd_mu={prefix}.mu, then {prefix}.bed, {prefix}.UD, and
# {prefix}.V must also exist
svd_mu="ref.vcf.mu",
output:
selfsm="a.selfSM",
ancestry="a.ancestry",
params:
# optional - see note for input.svd_mu
# current choices are {37,38}
# genome_build="38",
log:
"logs/verifybamid2/a.log",
wrapper:
"v4.6.0-24-g250dd3e/bio/verifybamid/verifybamid2"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Notes
The extra param allows for additional program arguments.
For more information see, https://github.com/Griffan/VerifyBamID
Software dependencies
verifybamid2=2.0.1
Input/Output
Input:
bam file
Output:
estimated intraspacies contamination
Code
__author__ = "Brett Copeland"
__copyright__ = "Copyright 2021, Brett Copeland"
__email__ = "brcopeland@ucsd.edu"
__license__ = "MIT"
import os
from tempfile import TemporaryDirectory
from shutil import copyfile
from snakemake.shell import shell
extra = snakemake.params.get("extra", "")
svd_mu = snakemake.input.get("svd_mu", "")
if svd_mu:
svd_prefix = os.path.splitext(svd_mu)[0]
for suffix in ("bed", "UD", "V"):
fn = f"{svd_prefix}.{suffix}"
if not os.path.isfile(fn):
raise Exception(f"Failed to find required input {fn}.")
else:
genome_build = snakemake.params.get("genome_build", "38")
if genome_build not in ("37", "38"):
raise Exception(
f"No svd_prefix given and improper {genome_build=} "
f"given. Valid choices are 37,38."
)
verifybamid2_found = False
for path in os.getenv("PATH").split(os.path.pathsep):
path_to_verifybamid2 = os.path.join(path, "verifybamid2")
if os.path.isfile(path_to_verifybamid2):
verifybamid2_found = True
resources_directory = os.path.join(
os.path.dirname(os.path.realpath(path_to_verifybamid2)), "resource"
)
svd_prefix = os.path.join(
resources_directory, f"1000g.phase3.100k.b{genome_build}.vcf.gz.dat"
)
break
if not verifybamid2_found:
raise Exception("Failed to find verifybamid2 location.")
def move_file(src, dst):
"this function will move `fn` while respecting ACLs in the target directory"
copyfile(src, dst)
os.remove(src)
# verifybamid2 outputs results to result.selfSM and result.Ancestry in the working directory,
# so to avoid collisions we have to run it from a temporary directory and fix the paths
# to inputs, outputs, and the log file
ref_path = os.path.abspath(snakemake.input.ref)
svd_prefix = os.path.abspath(svd_prefix)
bam_path = os.path.abspath(snakemake.input.bam)
selfsm_path = os.path.abspath(snakemake.output.selfsm)
ancestry_path = os.path.abspath(snakemake.output.ancestry)
if snakemake.log:
snakemake.log[0] = os.path.abspath(snakemake.log[0])
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
with TemporaryDirectory() as tmp_dir:
os.chdir(tmp_dir)
shell(
"verifybamid2 --SVDPrefix {svd_prefix} "
"--Reference {ref_path} --BamFile {bam_path} {extra} "
"--NumThread {snakemake.threads} {log}"
)
move_file("result.selfSM", selfsm_path)
move_file("result.Ancestry", ancestry_path)