CALC_CONSENSUS_READS

Performs consensus read calculation on a marked bam and merges single, paired and skipped-consensus reads into a single sorted bam file.

Usage

Via copy-paste

This meta-wrapper does not yet use Snakemake’s pathvars syntax. You can best directly copy-paste and modify the full meta-wrapper code below into your workflow.

Execution

When running with

snakemake --sdm conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Used wrappers

The following individual wrappers are used in this meta-wrapper:

Please refer to each wrapper in above list for additional configuration parameters and information about the executed code.

Authors

Code

import os.path

rule calc_consensus_reads:
    input:
        # sorted bam file
        "<mapped_reads>",
    output:
        # non-overlapping consensus read pairs will be written into consensus_r1 and consensus_r2
        consensus_r1=temp("<results>/consensus_reads/{sample}.1.fq"),
        consensus_r2=temp("<results>/consensus_reads/{sample}.2.fq"),
        # consensus reads from single end records or overlapping read pairs will be merged into a single end record
        consensus_se=temp("<results>/consensus_reads/{sample}.se.fq"),
        # skipped reads (soft-clipped or unpropper mapped reads) will be skipped and unmarked
        skipped=temp("<results>/consensus_reads/{sample}.skipped.bam"),
    params:
        extra="",
    log:
        "<logs>/consensus/{sample}.log",
    wrapper:
        "v3.9.0/bio/rbt/collapse_reads_to_fragments-bam"


rule bwa_index:
    input:
        "<genome_sequence>",
    output:
        idx=multiext("<resources>/bwa_index/genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
    log:
        "<logs>/bwa_index.log",
    wrapper:
        "v5.10.0/bio/bwa/index"


rule map_consensus_reads:
    input:
        reads=lambda wc: expand(
            "<results>/consensus_reads/{sample}.{read}.fq",
            sample=wc.sample,
            read="se" if wc.read_type == "se" else (1, 2),
        ),
        idx=multiext("<resources>/bwa_index/genome", ".amb", ".ann", ".bwt", ".pac", ".sa"),
    output:
        temp("<results>/consensus_mapped/{sample}.{read_type}.bam"),
    params:
        extra=r"-C -R '@RG\tID:{sample}\tSM:{sample}'",
        index=lambda w, input: os.path.splitext(input.idx[0])[0],
        sort="samtools",
        sort_order="coordinate",
    wildcard_constraints:
        read_type="pe|se",
    log:
        "<logs>/bwa_mem/{sample}.{read_type}.consensus.log",
    threads: 8
    wrapper:
        "v7.6.0/bio/bwa/mem"


rule sort_skipped_reads:
    input:
        "<results>/consensus_reads/{sample}.skipped.bam",
    output:
        temp("<results>/consensus_reads/{sample}.skipped.sorted.bam"),
    params:
        extra="-m 4G",
        tmp_dir="/tmp/",
    log:
        "<logs>/sort_consensus/{sample}.log",
    # Samtools takes additional threads through its option -@
    threads: 8  # This value - 1 will be sent to -@.
    wrapper:
        "v7.6.0/bio/samtools/sort"


rule mark_duplicates_skipped:
    input:
        bams=["<results>/consensus_reads/{sample}.skipped.sorted.bam"],
    output:
        bam=temp("<results>/consensus_dupmarked/{sample}.skipped.marked.bam"),
        metrics="<results>/consensus_dupmarked/{sample}.skipped.metrics.txt",
    log:
        "<logs>/picard/marked/{sample}.log",
    params:
        extra="--VALIDATION_STRINGENCY LENIENT --TAG_DUPLICATE_SET_MEMBERS true",
    # optional specification of memory usage of the JVM that snakemake will respect with global
    # resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
    # and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
    # https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
    resources:
        mem_mb=1024,
    wrapper:
        "v7.6.0/bio/picard/markduplicates"


rule merge_consensus_reads:
    input:
        "<results>/consensus_dupmarked/{sample}.skipped.marked.bam",
        "<results>/consensus_mapped/{sample}.se.bam",
        "<results>/consensus_mapped/{sample}.pe.bam",
    output:
        "<results>/consensus/{sample}.bam",
    log:
        "<logs>/samtools_merge/{sample}.log",
    threads: 8
    wrapper:
        "v7.6.0/bio/samtools/merge"