PINDEL2VCF

Convert pindel output to vcf.

Software dependencies

  • pindel ==0.2.5b8

Example

This wrapper can be used in the following way:

rule pindel2vcf:
    input:
        ref="genome.fasta",
        pindel="pindel/all_{type}"
    output:
        "pindel/all_{type}.vcf"
    params:
        refname="hg38",  # mandatory, see pindel manual
        refdate="20170110",  # mandatory, see pindel manual
        extra=""  # extra params (except -r, -p, -R, -d, -v)
    log:
        "logs/pindel/pindel2vcf.{type}.log"
    wrapper:
        "0.67.0/bio/pindel/pindel2vcf"

rule pindel2vcf_multi_input:
    input:
        ref="genome.fasta",
        pindel=["pindel/all_D", "pindel/all_INV"]
    output:
        "pindel/all.vcf"
    params:
        refname="hg38",  # mandatory, see pindel manual
        refdate="20170110",  # mandatory, see pindel manual
        extra=""  # extra params (except -r, -p, -R, -d, -v)
    log:
        "logs/pindel/pindel2vcf.log"
    wrapper:
        "0.67.0/bio/pindel/pindel2vcf"

Note that input, output and log file paths can be chosen freely. When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Authors

  • Johannes Köster

Code

__author__ = "Johannes Köster, Patrik Smeds"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"

import os
import tempfile
from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

expected_endings = [
    "INT",
    "D",
    "SI",
    "INV",
    "INV_final" "TD",
    "LI",
    "BP",
    "CloseEndMapped",
    "RP",
]


def split_file_name(file_parts, file_ending_index):
    return (
        "_".join(file_parts[:file_ending_index]),
        "_".join(file_parts[file_ending_index]),
    )


def process_input_path(input_file):
    """
    :params input_file: Input file from rule, ex /path/to/file/all_D or /path/to/file/all_INV_final
    :return: ""/path/to/file", "all"

    """
    file_path, file_name = os.path.split(input_file)
    file_parts = file_name.split("_")
    # seperate ending and name, to name: all ending: D or name: all ending: INV_final
    file_name, file_ending = split_file_name(
        file_parts, -2 if file_name.endswith("_final") else -1
    )
    if not file_ending in expected_endings:
        raise Exception("Unexpected variant type: " + file_ending)
    return file_path, file_name


with tempfile.TemporaryDirectory() as tmpdirname:
    input_flag = "-p"
    input_file = snakemake.input.get("pindel")
    if isinstance(input_file, list) and len(input_file) > 1:
        input_flag = "-P"
        input_path, input_name = process_input_path(input_file[0])
        input_file = os.path.join(input_path, input_name)
        for variant_input in snakemake.input.pindel:
            if not variant_input.startswith(input_file):
                raise Exception(
                    "Unable to extract common path from multi file input, expect path is: "
                    + input_file
                )
            if not os.path.isfile(variant_input):
                raise Exception('Input "' + input_file + '" is not a file!')
            os.symlink(
                os.path.abspath(variant_input),
                os.path.join(tmpdirname, os.path.basename(variant_input)),
            )
        input_file = os.path.join(tmpdirname, input_name)
    shell(
        "pindel2vcf {snakemake.params.extra} {input_flag} {input_file} -r {snakemake.input.ref} -R {snakemake.params.refname} -d {snakemake.params.refdate} -v {snakemake.output[0]} {log}"
    )