SALMON QUANT
Quantify transcripts with salmon
URL: https://salmon.readthedocs.io/en/latest/salmon.html#quantifying-in-mapping-based-mode
Example
This wrapper can be used in the following way:
rule salmon_quant_pe_list:
input:
# If you have multiple fastq files for a single sample (e.g. technical replicates)
# use a list for r1 and r2.
r1="reads/{sample}_1.fq.gz",
r2="reads/{sample}_2.fq.gz",
index=multiext(
"salmon/transcriptome_index/",
"duplicate_clusters.tsv",
"index.ctab",
"index.ectab",
"index.refinfo",
"index.ssi",
"index.ssi.mphf",
"index.tct",
"index.tdct",
"info.json",
"refseq.bin",
"refseq_offsets.json",
),
output:
quant="salmon_pe_list/{sample}/quant.sf",
lib="salmon_pe_list/{sample}/lib_format_counts.json",
log:
"logs/salmon_pe_list/{sample}.log",
threads: 2
params:
# optional parameters
libtype="A",
extra="",
wrapper:
"v9.11.0/bio/salmon/quant"
rule salmon_quant_pe:
input:
# If you have multiple fastq files for a single sample (e.g. technical replicates)
# use a list for r1 and r2.
r1="reads/{sample}_1.fq.gz",
r2="reads/{sample}_2.fq.gz",
index="salmon/transcriptome_index",
output:
quant="salmon_pe/{sample}/quant.sf",
lib="salmon_pe/{sample}/lib_format_counts.json",
log:
"logs/salmon_pe/{sample}.log",
threads: 2
params:
# optional parameters
libtype="A",
extra="",
wrapper:
"v9.11.0/bio/salmon/quant"
rule salmon_quant_pe_multi:
input:
# If you have multiple fastq files for a single sample (e.g. technical replicates, flowcells),
# use a list for multiple fastq files for each sample.
r1=["reads/a_1.fq.gz", "reads/b_1.fq.gz"],
r2=["reads/a_2.fq.gz", "reads/b_2.fq.gz"],
index="salmon/transcriptome_index",
output:
quant="salmon_pe_multi/ab_pe_x_transcriptome/quant.sf",
lib="salmon_pe_multi/ab_pe_x_transcriptome/lib_format_counts.json",
log:
"logs/salmon/ab_pe_x_transcriptome.log",
threads: 2
params:
# optional parameters
libtype="A",
extra="",
wrapper:
"v9.11.0/bio/salmon/quant"
rule salmon_quant_se:
input:
r="reads/{sample}.fq.gz",
index="salmon/transcriptome_index",
output:
quant="salmon_se/{sample}_x_transcriptome/quant.sf",
lib="salmon_se/{sample}_x_transcriptome/lib_format_counts.json",
log:
"logs/salmon_se/{sample}_x_transcriptome.log",
threads: 2
params:
# optional parameters
libtype="A",
extra="",
wrapper:
"v9.11.0/bio/salmon/quant"
rule salmon_quant_se_bz2:
input:
r="reads/{sample}.fq.bz2",
index="salmon/transcriptome_index",
output:
quant="salmon_se_bz2/{sample}_x_transcriptome/quant.sf",
lib="salmon_se_bz2/{sample}_x_transcriptome/lib_format_counts.json",
log:
"logs/salmon_se_bz2/{sample}_x_transcriptome.log",
threads: 2
params:
# optional parameters
libtype="A",
extra="",
wrapper:
"v9.11.0/bio/salmon/quant"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Notes
Salmon accepted either a list of unpaired reads (r parameter), or two lists of the same length containing paired reads (r1 and r2 parameters). Not both.
Software dependencies
salmon=2.0.1gzip=1.14bzip2=1.0.8
Input/Output
Input:
index: Path to Salmon indexed sequences, see bio/salmon/indexgtf: Optional path to a GTF formatted genome annotationr: Path to unpaired readsr1: Path to upstream reads file.r2: Path to downstream reads file.
Output:
Path to quantification file
bam: Path to pseudo-bam file
Params
libType: Format string describing the library type, see official documentation on Library Types for list of accepted values.extra: Optional command line parameters, besides IO parameters and threads.
Code
"""Snakemake wrapper for Salmon Quant"""
__author__ = "Tessa Pierce"
__copyright__ = "Copyright 2018, Tessa Pierce"
__email__ = "ntpierce@gmail.com"
__license__ = "MIT"
from os.path import dirname
from snakemake.shell import shell
class MixedPairedUnpairedInput(Exception):
def __init__(self):
super().__init__(
"Salmon cannot quantify mixed paired/unpaired input files. "
"Please input either `r1`, `r2` (paired) or `r` (unpaired)"
)
class MissingMateError(Exception):
def __init__(self):
super().__init__(
"Salmon requires an equal number of paired reads in `r1` and `r2`,"
" or a list of unpaired reads `r`"
)
def uncompress_bz2(snake_io, salmon_threads):
"""
Provide bzip2 on-the-fly decompression
For each of these b-unzipping, a thread will be used. Therefore, the maximum number of threads given to Salmon
shall be reduced by one in order not to be killed on a cluster.
"""
# Asking forgiveness instead of permission
try:
# If no error are raised, then we have a string.
if snake_io.endswith("bz2"):
return [f"<( bzip2 --decompress --stdout {snake_io} )"], salmon_threads - 1
return [snake_io], salmon_threads
except AttributeError:
# As an error has been raise, we have a list of fastq files.
fq_files = []
for fastq in snake_io:
if fastq.endswith("bz2"):
fq_files.append(f"<( bzip2 --decompress --stdout {fastq} )")
salmon_threads -= 1
else:
fq_files.append(fastq)
return fq_files, salmon_threads
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
libtype = snakemake.params.get("libtype", "A")
max_threads = snakemake.threads
extra = snakemake.params.get("extra", "")
if "--validateMappings" in extra:
raise DeprecationWarning("`--validateMappings` is deprecated and has no effect")
r1 = snakemake.input.get("r1")
r2 = snakemake.input.get("r2")
r = snakemake.input.get("r")
if all(mate is not None for mate in [r1, r2]):
r1, max_threads = uncompress_bz2(r1, max_threads)
r2, max_threads = uncompress_bz2(r2, max_threads)
if len(r1) != len(r2):
raise MissingMateError()
if r is not None:
raise MixedPairedUnpairedInput()
r1_cmd = " --mates1 {}".format(" ".join(r1))
r2_cmd = " --mates2 {}".format(" ".join(r2))
read_cmd = " ".join([r1_cmd, r2_cmd])
elif r is not None:
if any(mate is not None for mate in [r1, r2]):
raise MixedPairedUnpairedInput()
r, max_threads = uncompress_bz2(r, max_threads)
read_cmd = " --unmatedReads {}".format(" ".join(r))
else:
raise MissingMateError()
gene_map = snakemake.input.get("gtf", "")
if gene_map:
gene_map = f"--geneMap {gene_map}"
bam = snakemake.output.get("bam", "")
if bam:
bam = f"--writeMappings {bam}"
outdir = dirname(snakemake.output.get("quant"))
index = snakemake.input["index"]
if isinstance(index, list):
index = dirname(index[0])
if max_threads < 1:
raise ValueError(
"On-the-fly b-unzipping have raised the required number of threads. "
f"Please request at least {1 - max_threads} more threads."
)
shell(
"salmon quant --index {index} "
" --libType {libtype} {read_cmd} --output {outdir} {gene_map} "
" --threads {max_threads} {extra} {bam} {log}"
)