.. _`bio/star/align`: STAR ==== .. image:: https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/star/align?label=version%20update%20pull%20requests :target: https://github.com/snakemake/snakemake-wrappers/pulls?q=is%3Apr+is%3Aopen+label%3Abio/star/align Map reads with STAR. **URL**: https://github.com/alexdobin/STAR Example ------- This wrapper can be used in the following way: .. code-block:: python rule star_pe_multi: input: # use a list for multiple fastq files for one sample # usually technical replicates across lanes/flowcells fq1=["reads/{sample}_R1.1.fastq", "reads/{sample}_R1.2.fastq"], # paired end reads needs to be ordered so each item in the two lists match fq2=["reads/{sample}_R2.1.fastq", "reads/{sample}_R2.2.fastq"], #optional # path to STAR reference genome index idx="index", output: # see STAR manual for additional output files aln="star/pe/{sample}/pe_aligned.sam", log="logs/pe/{sample}/Log.out", sj="star/pe/{sample}/SJ.out.tab", unmapped=["star/pe/{sample}/unmapped.1.fastq.gz","star/pe/{sample}/unmapped.2.fastq.gz"], log: "logs/pe/{sample}.log", params: # optional parameters extra="", threads: 8 wrapper: "v3.0.1/bio/star/align" rule star_se: input: fq1="reads/{sample}_R1.1.fastq", # path to STAR reference genome index idx="index", output: # see STAR manual for additional output files aln="star/se/{sample}/se_aligned.bam", log="logs/se/{sample}/Log.out", log_final="logs/se/{sample}/Log.final.out", unmapped="star/se/{sample}/unmapped.fastq", log: "logs/se/{sample}.log", params: # optional parameters extra="--outSAMtype BAM Unsorted", threads: 8 wrapper: "v3.0.1/bio/star/align" Note that input, output and log file paths can be chosen freely. When running with .. code-block:: bash snakemake --use-conda the software dependencies will be automatically deployed into an isolated environment before execution. Notes ----- * The `extra` param allows for additional program arguments. * It is advisable to consider updating the limits setting before running STAR, such as executing `ulimit -n 10000`, to avoid an issue like this: https://github.com/alexdobin/STAR/issues/1344 Software dependencies --------------------- * ``star=2.7.11a`` Authors ------- * Johannes Köster * Tomás Di Domenico * Filipe G. Vieira * Kashyap Chhatbar Code ---- .. code-block:: python __author__ = "Johannes Köster" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "koester@jimmy.harvard.edu" __license__ = "MIT" import os import tempfile from snakemake.shell import shell extra = snakemake.params.get("extra", "") log = snakemake.log_fmt_shell(stdout=False, stderr=True) fq1 = snakemake.input.get("fq1") assert fq1 is not None, "input-> fq1 is a required input parameter" fq1 = ( [snakemake.input.fq1] if isinstance(snakemake.input.fq1, str) else snakemake.input.fq1 ) fq2 = snakemake.input.get("fq2") if fq2: fq2 = ( [snakemake.input.fq2] if isinstance(snakemake.input.fq2, str) else snakemake.input.fq2 ) assert len(fq1) == len( fq2 ), "input-> equal number of files required for fq1 and fq2" input_str_fq1 = ",".join(fq1) input_str_fq2 = ",".join(fq2) if fq2 is not None else "" input_str = " ".join([input_str_fq1, input_str_fq2]) if fq1[0].endswith(".gz"): readcmd = "--readFilesCommand gunzip -c" elif fq1[0].endswith(".bz2"): readcmd = "--readFilesCommand bunzip2 -c" else: readcmd = "" out_unmapped = snakemake.output.get("unmapped", "") if out_unmapped: out_unmapped = "--outReadsUnmapped Fastx" index = snakemake.input.get("idx") if not index: index = snakemake.params.get("idx", "") if "--outSAMtype BAM SortedByCoordinate" in extra: stdout = "BAM_SortedByCoordinate" elif "BAM Unsorted" in extra: stdout = "BAM_Unsorted" else: stdout = "SAM" with tempfile.TemporaryDirectory() as tmpdir: shell( "STAR " " --runThreadN {snakemake.threads}" " --genomeDir {index}" " --readFilesIn {input_str}" " {readcmd}" " {extra}" " {out_unmapped}" " --outTmpDir {tmpdir}/STARtmp" " --outFileNamePrefix {tmpdir}/" " --outStd {stdout}" " > {snakemake.output.aln}" " {log}" ) if snakemake.output.get("reads_per_gene"): shell("cat {tmpdir}/ReadsPerGene.out.tab > {snakemake.output.reads_per_gene:q}") if snakemake.output.get("chim_junc"): shell("cat {tmpdir}/Chimeric.out.junction > {snakemake.output.chim_junc:q}") if snakemake.output.get("sj"): shell("cat {tmpdir}/SJ.out.tab > {snakemake.output.sj:q}") if snakemake.output.get("log"): shell("cat {tmpdir}/Log.out > {snakemake.output.log:q}") if snakemake.output.get("log_progress"): shell("cat {tmpdir}/Log.progress.out > {snakemake.output.log_progress:q}") if snakemake.output.get("log_final"): shell("cat {tmpdir}/Log.final.out > {snakemake.output.log_final:q}") unmapped = snakemake.output.get("unmapped") if unmapped: # SE if not fq2: unmapped = [unmapped] for i, out_unmapped in enumerate(unmapped, 1): cmd = "gzip -c" if out_unmapped.endswith("gz") else "cat" shell("{cmd} {tmpdir}/Unmapped.out.mate{i} > {out_unmapped}") .. |nl| raw:: html