CALC_CONSENSUS_READS
Performs consensus read calculation on a marked bam and merges single, paired and skipped-consensus reads into a single sorted bam file.
Usage
Via copy-paste
This meta-wrapper does not yet use Snakemake’s pathvars syntax. You can best directly copy-paste and modify the full meta-wrapper code below into your workflow.
Execution
When running with
snakemake --sdm conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Used wrappers
The following individual wrappers are used in this meta-wrapper:
Please refer to each wrapper in above list for additional configuration parameters and information about the executed code.
Code
import os.path
rule calc_consensus_reads:
input:
# sorted bam file
"<mapped_reads>",
output:
# non-overlapping consensus read pairs will be written into consensus_r1 and consensus_r2
consensus_r1=temp("<results>/consensus_reads/{sample}.1.fq"),
consensus_r2=temp("<results>/consensus_reads/{sample}.2.fq"),
# consensus reads from single end records or overlapping read pairs will be merged into a single end record
consensus_se=temp("<results>/consensus_reads/{sample}.se.fq"),
# skipped reads (soft-clipped or unpropper mapped reads) will be skipped and unmarked
skipped=temp("<results>/consensus_reads/{sample}.skipped.bam"),
params:
extra="",
log:
"<logs>/consensus/{sample}.log",
wrapper:
"v3.9.0/bio/rbt/collapse_reads_to_fragments-bam"
rule bwa_index:
input:
"<genome_sequence>",
output:
idx=multiext("<resources>/bwa_index/genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
log:
"<logs>/bwa_index.log",
wrapper:
"v5.10.0/bio/bwa/index"
rule map_consensus_reads:
input:
reads=lambda wc: expand(
"<results>/consensus_reads/{sample}.{read}.fq",
sample=wc.sample,
read="se" if wc.read_type == "se" else (1, 2),
),
idx=multiext("<resources>/bwa_index/genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
temp("<results>/consensus_mapped/{sample}.{read_type}.bam"),
params:
extra=r"-C -R '@RG\tID:{sample}\tSM:{sample}'",
index=lambda w, input: os.path.splitext(input.idx[0])[0],
sort="samtools",
sort_order="coordinate",
wildcard_constraints:
read_type="pe|se",
log:
"<logs>/bwa_mem/{sample}.{read_type}.consensus.log",
threads: 8
wrapper:
"v7.6.0/bio/bwa/mem"
rule sort_skipped_reads:
input:
"<results>/consensus_reads/{sample}.skipped.bam",
output:
temp("<results>/consensus_reads/{sample}.skipped.sorted.bam"),
params:
extra="-m 4G",
tmp_dir="/tmp/",
log:
"<logs>/sort_consensus/{sample}.log",
# Samtools takes additional threads through its option -@
threads: 8 # This value - 1 will be sent to -@.
wrapper:
"v7.6.0/bio/samtools/sort"
rule mark_duplicates_skipped:
input:
bams=["<results>/consensus_reads/{sample}.skipped.sorted.bam"],
output:
bam=temp("<results>/consensus_dupmarked/{sample}.skipped.marked.bam"),
metrics="<results>/consensus_dupmarked/{sample}.skipped.metrics.txt",
log:
"<logs>/picard/marked/{sample}.log",
params:
extra="--VALIDATION_STRINGENCY LENIENT --TAG_DUPLICATE_SET_MEMBERS true",
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024,
wrapper:
"v7.6.0/bio/picard/markduplicates"
rule merge_consensus_reads:
input:
"<results>/consensus_dupmarked/{sample}.skipped.marked.bam",
"<results>/consensus_mapped/{sample}.se.bam",
"<results>/consensus_mapped/{sample}.pe.bam",
output:
"<results>/consensus/{sample}.bam",
log:
"<logs>/samtools_merge/{sample}.log",
threads: 8
wrapper:
"v7.6.0/bio/samtools/merge"