STAR

Map reads with STAR.

URL:

Example

This wrapper can be used in the following way:

rule star_pe_multi:
    input:
        # use a list for multiple fastq files for one sample
        # usually technical replicates across lanes/flowcells
        fq1=["reads/{sample}_R1.1.fastq", "reads/{sample}_R1.2.fastq"],
        # paired end reads needs to be ordered so each item in the two lists match
        fq2=["reads/{sample}_R2.1.fastq", "reads/{sample}_R2.2.fastq"],  #optional
    output:
        # see STAR manual for additional output files
        sam="star/pe/{sample}/Aligned.out.sam",
        log="star/pe/{sample}/Log.out",
    log:
        "logs/star/pe/{sample}.log",
    params:
        # path to STAR reference genome index
        idx="index",
        # optional parameters
        extra="",
    threads: 8
    wrapper:
        "v1.2.1/bio/star/align"


rule star_se:
    input:
        fq1="reads/{sample}_R1.1.fastq",
    output:
        # see STAR manual for additional output files
        sam="star/se/{sample}/Aligned.out.sam",
        log="star/se/{sample}/Log.out",
    log:
        "logs/star/se/{sample}.log",
    params:
        # path to STAR reference genome index
        idx="index",
        # optional parameters
        extra="",
    threads: 8
    wrapper:
        "v1.2.1/bio/star/align"

Note that input, output and log file paths can be chosen freely.

When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Software dependencies

  • star=2.7

Notes

Authors

  • Johannes Köster
  • Tomás Di Domenico
  • Filipe G. Vieira

Code

__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "koester@jimmy.harvard.edu"
__license__ = "MIT"


import os
import tempfile
from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

fq1 = snakemake.input.get("fq1")
assert fq1 is not None, "input-> fq1 is a required input parameter"
fq1 = (
    [snakemake.input.fq1]
    if isinstance(snakemake.input.fq1, str)
    else snakemake.input.fq1
)
fq2 = snakemake.input.get("fq2")
if fq2:
    fq2 = (
        [snakemake.input.fq2]
        if isinstance(snakemake.input.fq2, str)
        else snakemake.input.fq2
    )
    assert len(fq1) == len(
        fq2
    ), "input-> equal number of files required for fq1 and fq2"
input_str_fq1 = ",".join(fq1)
input_str_fq2 = ",".join(fq2) if fq2 is not None else ""
input_str = " ".join([input_str_fq1, input_str_fq2])

if fq1[0].endswith(".gz"):
    readcmd = "--readFilesCommand zcat"
else:
    readcmd = ""

index = snakemake.input.get("idx")
if not index:
    index = snakemake.params.get("idx", "")

with tempfile.TemporaryDirectory() as tmpdir:
    shell(
        "STAR "
        " --runThreadN {snakemake.threads}"
        " --genomeDir {index}"
        " --readFilesIn {input_str}"
        " {readcmd}"
        " {extra}"
        " --outTmpDir {tmpdir}/temp"
        " --outFileNamePrefix {tmpdir}/"
        " --outStd Log "
        " {log}"
    )

    if "SortedByCoordinate" in extra:
        bamprefix = "Aligned.sortedByCoord.out"
    else:
        bamprefix = "Aligned.out"

    if snakemake.output.get("bam"):
        shell("cat {tmpdir}/{bamprefix}.bam > {snakemake.output.bam:q}")
    if snakemake.output.get("sam"):
        shell("cat {tmpdir}/{bamprefix}.sam > {snakemake.output.sam:q}")
    if snakemake.output.get("reads_per_gene"):
        shell("cat {tmpdir}/ReadsPerGene.out.tab > {snakemake.output.reads_per_gene:q}")
    if snakemake.output.get("chim_junc"):
        shell("cat {tmpdir}/Chimeric.out.junction > {snakemake.output.chim_junc:q}")
    if snakemake.output.get("sj"):
        shell("cat {tmpdir}/SJ.out.tab > {snakemake.output.sj:q}")
    if snakemake.output.get("log"):
        shell("cat {tmpdir}/Log.out > {snakemake.output.log:q}")
    if snakemake.output.get("log_progress"):
        shell("cat {tmpdir}/Log.progress.out > {snakemake.output.log_progress:q}")
    if snakemake.output.get("log_final"):
        shell("cat {tmpdir}/Log.final.out > {snakemake.output.log_final:q}")