SALMON_QUANT

Quantify transcripts with salmon

Software dependencies

  • salmon ==0.10.0

Example

This wrapper can be used in the following way:

rule salmon_quant_reads:
    input:
        # If you have multiple fastq files for a single sample (e.g. technical replicates)
        # use a list for r1 and r2.
        r1 = "reads/{sample}_1.fq.gz",
        r2 = "reads/{sample}_2.fq.gz",
        index = "salmon/transcriptome_index"
    output:
        quant = 'salmon/{sample}/quant.sf',
        lib = 'salmon/{sample}/lib_format_counts.json'
    log:
        'logs/salmon/{sample}.log'
    params:
        # optional parameters
        libtype ="A",
        #zip_ext = bz2 # req'd for bz2 files ('bz2'); optional for gz files('gz')
        extra=""
    threads: 2
    wrapper:
        "0.31.0/bio/salmon/quant"

Note that input, output and log file paths can be chosen freely. When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Authors

  • Tessa Pierce

Code

"""Snakemake wrapper for Salmon Quant"""

__author__ = "Tessa Pierce"
__copyright__ = "Copyright 2018, Tessa Pierce"
__email__ = "ntpierce@gmail.com"
__license__ = "MIT"

from os import path
from snakemake.shell import shell

def manual_decompression (reads, zip_ext):
    """ Allow *.bz2 input into salmon. Also provide same
    decompression for *gz files, as salmon devs mention
    it may be faster in some cases."""
    if zip_ext and reads:
        if zip_ext == 'bz2':
            reads = ' < (bunzip2 -c ' + reads + ')'
        elif zip_ext == 'gz':
            reads = ' < (gunzip -c ' + reads + ')'
    return reads

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=False, stderr=True)
zip_extension = snakemake.params.get("zip_extension", "")
libtype = snakemake.params.get("libtype", "A")

r1 = snakemake.input.get("r1")
r2 =  snakemake.input.get("r2")
r = snakemake.input.get("r")

assert (r1 is not None and r2 is not None) or r is not None, "either r1 and r2 (paired), or r (unpaired) are required as input"
if r1:
    r1 = [snakemake.input.r1] if isinstance(snakemake.input.r1, str) else snakemake.input.r1
    r2 = [snakemake.input.r2] if isinstance(snakemake.input.r2, str) else snakemake.input.r2
    assert len(r1) == len(r2), "input-> equal number of files required for r1 and r2"
    r1_cmd = ' -1 ' + manual_decompression(" ".join(r1), zip_extension)
    r2_cmd = ' -2 ' + manual_decompression(" ".join(r2), zip_extension)
    read_cmd = " ".join([r1_cmd,r2_cmd])
if r:
    assert r1 is None and r2 is None, "Salmon cannot quantify mixed paired/unpaired input files. Please input either r1,r2 (paired) or r (unpaired)"
    r = [snakemake.input.r] if isinstance(snakemake.input.r, str) else snakemake.input.r
    read_cmd = ' -r ' + manual_decompression(" ".join(r), zip_extension)

outdir = path.dirname(snakemake.output.get('quant'))

shell("salmon quant -i {snakemake.input.index} "
      " -l {libtype} {read_cmd} -o {outdir} "
      " -p {snakemake.threads} {extra} {log} ")