.. _`bio/deseq2/wald`: DESEQ2 ====== .. image:: https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/deseq2/wald?label=version%20update%20pull%20requests :target: https://github.com/snakemake/snakemake-wrappers/pulls?q=is%3Apr+is%3Aopen+label%3Abio/deseq2/wald Call differentially expressed genes with DESeq2 **URL**: https://bioconductor.org/packages/3.16/bioc/html/DESeq2.html Example ------- This wrapper can be used in the following way: .. code-block:: python rule test_deseq2_wald: input: dds="dds.RDS", output: wald_rds="wald.RDS", wald_tsv="dge.tsv", deseq2_result_dir=directory("deseq_results"), normalized_counts_table="counts.tsv", normalized_counts_rds="counts.RDS", params: deseq_extra="", shrink_extra="", results_extra="", contrast=["condition", "A", "B"], threads: 1 log: "logs/deseq2.log", wrapper: "v3.0.1/bio/deseq2/wald" Note that input, output and log file paths can be chosen freely. When running with .. code-block:: bash snakemake --use-conda the software dependencies will be automatically deployed into an isolated environment before execution. Software dependencies --------------------- * ``bioconductor-deseq2=1.40.2`` * ``bioconductor-biocparallel=1.34.2`` * ``r-ashr=2.2_63`` Input/Output ------------ **Input:** * ``dds``: Path to RDS-formatted DESeq2-object **Output:** * ``wald_rds``: Optional path to wald test results (RDS formatted) * ``wald_tsv``: Optional path to wald test results (TSV formatted). Required optional parameter `contrast` (see below) * ``deseq2_result_dir``: Optional path to a directory that shall contain all DESeq2 results for each comparison (each file is TSV formatted) * ``normalized_counts_table``: Optional path to normalized counts (TSV formatted) * ``normalized_counts_rds``: Optional path to normalized counts (RDS formatted) Params ------ * ``deseq_extra``: Optional parameters provided to the function `DESeq()` * ``schrink_extra``: Optional parameters provided to the function `lfSchrink()` * ``results_extra``: Optional parameters provided to the function `result()` * ``contrast``: List of characters. See notes below. Authors ------- * Thibault Dayris Code ---- .. code-block:: R # This script takes a deseq2 dataset object, performs # a DESeq2 wald test, and saves results as requested by user # __author__ = "Thibault Dayris" # __copyright__ = "Copyright 2023, Thibault Dayris" # __email__ = "thibault.dayris@gustaveroussy.fr" # __license__ = "MIT" # Sink the stderr and stdout to the snakemake log file # https://stackoverflow.com/a/48173272 log_file <- base::file(description = snakemake@log[[1]], open = "wt") base::sink(file = log_file) base::sink(file = log_file, type = "message") # Loading libraries (order matters) base::library(package = "BiocParallel", character.only = TRUE) base::library(package = "SummarizedExperiment", character.only = TRUE) base::library(package = "DESeq2", character.only = TRUE) base::library(package = "ashr", character.only = TRUE) # Function to handle optional user-defined parameters # and still follow R syntax add_extra <- function(wrapper_extra, snakemake_param_name) { if (snakemake_param_name %in% base::names(snakemake@params)) { # Case user provides snakemake_param_name in snakemake rule user_param <- snakemake@params[[snakemake_param_name]] param_is_empty <- user_param == "" param_is_character <- inherits(x = user_param, what = "charcter") if ((! param_is_empty) && (param_is_character)) { # Case user do not provide an empty string # (R does not like trailing commas at the end # of a function call) wrapper_extra <- base::paste( wrapper_extra, user_param, sep = ", " ) } # Nothing to do if user provides an empty / NULL parameter value } # Nothing to do if user did not provide snakemake_param_name # In any case, required parameters must be returned base::return(wrapper_extra) } # Setting up multithreading if required parallel <- FALSE if (snakemake@threads > 1) { BiocParallel::register( BPPARAM = BiocParallel::MulticoreParam(snakemake@threads) ) parallel <- TRUE } # Load DESeq2 dataset dds_path <- base::as.character(x = snakemake@input[["dds"]]) dds <- base::readRDS(file = dds_path) base::message("Libraries and dataset loaded") # Build extra parameters for DESeq2 extra_deseq2 <- add_extra( wrapper_extra = "object = dds, test = 'Wald', parallel = parallel", snakemake_param_name = "deseq_extra" ) deseq2_cmd <- base::paste0( "DESeq2::DESeq(", extra_deseq2, ")" ) base::message("DESeq2 command line:") base::message(deseq2_cmd) # Running DESeq2::DESeq for wald test result wald <- base::eval(base::parse(text = deseq2_cmd)) # The rest of the script is here to save part or complete # list of results in RDS or plain text (TSV) formats. # Save main result on user request (RDS) # This includes counts, wald tests for all levels # assays, design, etc. if ("wald_rds" %in% base::names(x = snakemake@output)) { output_rds <- base::as.character(x = snakemake@output[["wald_rds"]]) base::saveRDS(obj = wald, file = output_rds) base::message("Wald test saved as RDS file") } # Saving normalized counts on demand table <- counts(wald) # TSV-formatted count table if ("normalized_counts_table" %in% base::names(snakemake@output)) { output_table <- base::as.character( x = snakemake@output[["normalized_counts_table"]] ) utils::write.table(x = table, file = output_table, sep = "\t", quote = FALSE) base::message("Normalized counts saved as TSV") } # RDS-formated count object with many information, # including counts, assays, etc. if ("normalized_counts_rds" %in% base::names(snakemake@output)) { output_rds <- base::as.character( x = snakemake@output[["normalized_counts_rds"]] ) base::saveRDS(obj = table, file = output_rds) base::message("Normalized counts saved as RDS") } # On user request: save all results as TSV in a directory. # User can later access the directory content, e.g. with # a snakemake checkpoint-rule. if ("deseq2_result_dir" %in% base::names(snakemake@output)) { # Acquire list of available results in DESeqDataSet wald_results_names <- DESeq2::resultsNames(object = wald) # Recovering extra parameters for TSV tables # The variable `result_name` is built below in `for` loop. results_extra <- add_extra( wrapper_extra = "object = wald, name = result_name, parallel = parallel", snakemake_param_name = "results_extra" ) # DESeq2 result dir will contain all results available in the Wald object output_prefix <- snakemake@output[["deseq2_result_dir"]] if (! base::file.exists(output_prefix)) { base::dir.create(path = output_prefix, recursive = TRUE) } # Building command lines for both wald results and fc schinkage results_cmd <- base::paste0("DESeq2::results(", results_extra, ")") base::message("Command line used for TSV results creation:") base::message(results_cmd) shrink_extra <- add_extra( "dds = wald, res = results_frame, contrast = contrast, parallel = parallel, type = 'ashr'", "shrink_extra" ) shrink_cmd <- base::paste0("DESeq2::lfcShrink(", shrink_extra, ")") base::message("Command line used for log(FC) shrinkage:") base::message(shrink_cmd) # For each available comparison in the wald-dds object for (result_name in wald_results_names) { # Building table base::message(base::paste("Saving results for", result_name)) results_frame <- base::eval(base::parse(text = results_cmd)) shrink_frame <- base::eval(base::parse(text = shrink_cmd)) results_frame$log2FoldChange <- shrink_frame$log2FoldChange results_path <- base::file.path( output_prefix, base::paste0(result_name, ".tsv") ) # Saving table utils::write.table( x = results_frame, file = results_path, quote = FALSE, sep = "\t", row.names = TRUE ) } } # If user provides contrasts, then a precise result # can be extracted from DESeq2 object. if ("wald_tsv" %in% base::names(x = snakemake@output)) { if ("contrast" %in% base::names(x = snakemake@params)) { contrast_length <- base::length(x = snakemake@params[["contrast"]]) results_extra <- "object=wald, parallel = parallel" contrast <- NULL if (contrast_length == 1) { # Case user provided a result name in the `contrast` parameter contrast <- base::as.character(x = snakemake@params[["contrast"]]) contrast <- base::paste0("name='", contrast[1], "'") } else if (contrast_length == 2) { # Case user provided both tested and reference level # In that order! Order matters. contrast <- sapply( snakemake@params[["contrast"]], function(extra) base::as.character(x = extra) ) contrast <- base::paste0( "contrast=list('", contrast[1], "', '", contrast[2], "')" ) } else if (contrast_length == 3) { # Case user provided both tested and reference level, # and studied factor. contrast <- sapply( snakemake@params[["contrast"]], function(extra) base::as.character(x = extra) ) contrast <- base::paste0( "contrast=c('", contrast[1], "', '", contrast[2], "', '", contrast[3], "')" ) # Finally saving results as contrast has been # built from user input. results_extra <- base::paste(results_extra, contrast, sep = ", ") results_cmd <- base::paste0("DESeq2::results(", results_extra, ")") base::message("Result extraction command: ", results_cmd) shrink_extra <- add_extra( "dds = wald, res = results_frame, contrast = contrast[1], parallel = parallel, type = 'ashr'", "shrink_extra" ) shrink_cmd <- base::paste0("DESeq2::lfcShrink(", shrink_extra, ")") base::message("Command line used for log(FC) shrinkage:") base::message(shrink_cmd) results_frame <- base::eval(base::parse(text = results_cmd)) shrink_frame <- base::eval(base::parse(text = shrink_cmd)) results_frame$log2FoldChange <- shrink_frame$log2FoldChange # Saving table utils::write.table( x = results_frame, file = base::as.character(x = snakemake@output[["wald_tsv"]]), quote = FALSE, sep = "\t", row.names = TRUE ) } } else { base::stop( "No contrast provided. ", "In absence of contrast, it is not possible ", "to guess the expected result name.", ) } } # Proper syntax to close the connection for the log file # but could be optional for Snakemake wrapper base::sink(type = "message") base::sink() .. |nl| raw:: html