HOMER ANNOTATEPEAKS

Performing peak annotation to associate peaks with nearby genes. For more information, please see the documentation.

URL:

Example

This wrapper can be used in the following way:

rule homer_annotatepeaks:
    input:
        peaks="peaks_refs/{sample}.peaks",
        genome="peaks_refs/gene.fasta",
        # optional input files
        # gtf="", # implicitly sets the -gtf flag
        # gene="", # implicitly sets the -gene flag for gene data file to add gene expression or other data types
        motif_files="peaks_refs/motives.txt", # implicitly sets the -m flag
        # filter_motiv="", # implicitly sets the -fm flag
        # center="",  # implicitly sets the -center flag
        nearest_peak="peaks_refs/b.peaks", # implicitly sets the -p flag
        # tag="",  # implicitly sets the -d flag for tagDirectories
        # vcf="", # implicitly sets the -vcf flag
        # bed_graph="", # implicitly sets the -bedGraph flag
        # wig="", # implicitly sets the -wig flag
        # map="", # implicitly sets the -map flag
        # cmp_genome="", # implicitly sets the -cmpGenome flag
        # cmp_Liftover="", # implicitly sets the -cmpLiftover flag
        # advanced_annotation=""  # optional, implicitly sets the -ann flag, see http://homer.ucsd.edu/homer/ngs/advancedAnnotation.html
    output:
        annotations="{sample}_annot.txt",
        # optional output, implicitly sets the -matrix flag, requires motif_files as input
        matrix=multiext("{sample}",
                        ".count.matrix.txt",
                        ".ratio.matrix.txt",
                        ".logPvalue.matrix.txt",
                        ".stats.txt"
                        ),
        # optional output, implicitly sets the -mfasta flag, requires motif_files as input
        mfasta="{sample}_motif.fasta",
        # # optional output, implicitly sets the -mbed flag, requires motif_files as input
        mbed="{sample}_motif.bed",
        # # optional output, implicitly sets the -mlogic flag, requires motif_files as input
        mlogic="{sample}_motif.logic"
    threads:
        2
    params:
        mode="", # add tss, tts or rna mode and options here, i.e. "tss mm8"
        extra="-gid"  # optional params, see http://homer.ucsd.edu/homer/ngs/annotation.html
    log:
        "logs/annotatePeaks/{sample}.log"
    wrapper:
        "v1.2.1/bio/homer/annotatePeaks"

Note that input, output and log file paths can be chosen freely.

When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Software dependencies

  • homer==4.11

Input/Output

Input:

  • peak or BED file
  • various optional input files, i.e. gtf, bedGraph, wiggle

Output:

  • annotation file (.txt)
  • various optional output files

Authors

  • Antonie Vietor

Code

__author__ = "Antonie Vietor"
__copyright__ = "Copyright 2020, Antonie Vietor"
__email__ = "antonie.v@gmx.de"
__license__ = "MIT"

from snakemake.shell import shell
import os

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

genome = snakemake.input.get("genome", "")
extra = snakemake.params.get("extra", "")
motif_files = snakemake.input.get("motif_files", "")
matrix = snakemake.output.get("matrix", "")

if genome == "":
    genome = "none"

# optional files
opt_files = {
    "gtf": "-gtf",
    "gene": "-gene",
    "motif_files": "-m",
    "filter_motiv": "-fm",
    "center": "-center",
    "nearest_peak": "-p",
    "tag": "-d",
    "vcf": "-vcf",
    "bed_graph": "-bedGraph",
    "wig": "-wig",
    "map": "-map",
    "cmp_genome": "-cmpGenome",
    "cmp_Liftover": "-cmpLiftover",
    "advanced_annotation": "-ann",
    "mfasta": "-mfasta",
    "mbed": "-mbed",
    "mlogic": "-mlogic",
}

requires_motives = False
for i in opt_files:
    file = None
    if i == "mfasta" or i == "mbed" or i == "mlogic":
        file = snakemake.output.get(i, "")
        if file:
            requires_motives = True
    else:
        file = snakemake.input.get(i, "")
    if file:
        extra += " {flag} {file}".format(flag=opt_files[i], file=file)

if requires_motives and motif_files == "":
    sys.exit(
        "The optional output files require motif_file(s) as input. For more information please see http://homer.ucsd.edu/homer/ngs/annotation.html."
    )

# optional matrix output files:
if matrix:
    if motif_files == "":
        sys.exit(
            "The matrix output files require motif_file(s) as input. For more information please see http://homer.ucsd.edu/homer/ngs/annotation.html."
        )
    ext = ".count.matrix.txt"
    matrix_out = [i for i in snakemake.output if i.endswith(ext)][0]
    matrix_name = os.path.basename(matrix_out[: -len(ext)])
    extra += " -matrix {}".format(matrix_name)

shell(
    "(annotatePeaks.pl"
    " {snakemake.params.mode}"
    " {snakemake.input.peaks}"
    " {genome}"
    " {extra}"
    " -cpu {snakemake.threads}"
    " > {snakemake.output.annotations})"
    " {log}"
)