.. _`bio/nonpareil/plot`: NONPAREIL PLOT ============== .. image:: https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/nonpareil/plot?label=version%20update%20pull%20requests :target: https://github.com/snakemake/snakemake-wrappers/pulls?q=is%3Apr+is%3Aopen+label%3Abio/nonpareil/plot Plot Nonpareil results. **URL**: https://nonpareil.readthedocs.io/en/latest/ Example ------- This wrapper can be used in the following way: .. code-block:: python rule test_nonpareil_plot: input: npo="{sample}.npo", output: pdf="results/{sample}.pdf", model="results/{sample}.RData", json="results/{sample}.json", threads: 1 log: "logs/{sample}.log", params: labels=lambda w: w.sample, col="blue", enforce_consistency=True, star=95, correction_factor=True, weights_exp=[-1.1,-1.2,-0.9,-1.3,-1], skip_model=False, plot_observed=True, plot_model=True, plot_dispersion="ci95", plot_diversity=False, wrapper: "v3.3.3/bio/nonpareil/plot" use rule test_nonpareil_plot as test_nonpareil_plot_multiple with: input: npo=["a.npo", "b.npo"], output: pdf="results/samples.pdf", model="results/samples.RData", json="results/samples.json", log: "logs/samples.log", params: labels=["Model A","Model B"], col=["blue","red"], enforce_consistency=True, star=95, correction_factor=True, plot_observed=True, plot_model=True, plot_dispersion="sd", plot_diversity=True, use rule test_nonpareil_plot as test_nonpareil_plot_nomodel with: output: pdf="results/{sample}.nomodel.pdf", model="results/{sample}.nomodel.RData", json="results/{sample}.nomodel.json", log: "logs/{sample}.nomodel.log", params: labels=lambda w: w.sample, col="blue", enforce_consistency=True, star=95, correction_factor=True, weights_exp=[-1.1,-1.2,-0.9,-1.3,-1], skip_model=True, plot_observed=True, plot_model=True, plot_dispersion="iq", plot_diversity=True Note that input, output and log file paths can be chosen freely. When running with .. code-block:: bash snakemake --use-conda the software dependencies will be automatically deployed into an isolated environment before execution. Software dependencies --------------------- * ``nonpareil=3.4.1`` * ``r-jsonlite`` Input/Output ------------ **Input:** * NPO file **Output:** * PDF file with plot Params ------ * ``labels``: Curve labels. * ``col``: Curve colors. * ``enforce_consistency``: Fails verbosely on insufficient data, otherwise it warns about the inconsistencies and attempts the estimations. * ``star``: Objective coverage in percentage (i.e., coverage value considered near-complete). * ``correction_factor``: Apply overlap-dependent correction factor, otherwise redundancy is assumed to equal coverage. * ``weights_exp``: Vector of values to be tested as exponent of the weights distribution. * ``skip_model``: Skip model estimation. * ``plot_observed``: Plot observed values? * ``plot_model``: Plot infered model? * ``plot_dispersion``: Plot dispersion around curve (one of `sd`, `ci95`, `ci90`, `ci50`, or `iq`)? * ``plot_diversity``: Plot diversity (bottom arrow)? Authors ------- * Filipe G. Vieira Code ---- .. code-block:: R # __author__ = "Filipe G. Vieira" # __copyright__ = "Copyright 2023, Filipe G. Vieira" # __license__ = "MIT" # This script plots results (NPO file) from NonPareil # Sink the stderr and stdout to the snakemake log file # https://stackoverflow.com/a/48173272 log.file <- file(snakemake@log[[1]], open = "wt") base::sink(log.file) base::sink(log.file, type = "message") # Loading libraries (order matters) base::library(package = "Nonpareil", character.only = TRUE) base::message("Libraries loaded") # Set input and output files in_files = snakemake@input[["npo"]] out_pdf = snakemake@output[[1]] base::message("Input files: ") base::print(in_files) base::message("Saving plot to file: ") base::print(out_pdf) # Set parameters params <- list("in_files" = in_files, "out_pdf" = out_pdf, "labels" = NA, "col" = NA, "enforce_consistency" = ifelse("enforce_consistency" %in% base::names(snakemake@params), as.logical(snakemake@params[["enforce_consistency"]]), FALSE), "star" = ifelse("star" %in% base::names(snakemake@params), snakemake@params[["star"]], 95), "correction_factor" = ifelse("correction_factor" %in% base::names(snakemake@params), as.logical(snakemake@params[["correction_factor"]]), FALSE), "weights_exp" = NA, "skip_model" = ifelse("skip_model" %in% base::names(snakemake@params), as.logical(snakemake@params[["skip_model"]]), FALSE), "plot_observed" = ifelse("plot_observed" %in% base::names(snakemake@params), as.logical(snakemake@params[["plot_observed"]]), TRUE), "plot_model" = ifelse("plot_model" %in% base::names(snakemake@params), as.logical(snakemake@params[["plot_model"]]), TRUE), "plot_dispersion" = ifelse("plot_dispersion" %in% base::names(snakemake@params), snakemake@params[["plot_dispersion"]], FALSE), "plot_diversity" = ifelse("plot_diversity" %in% base::names(snakemake@params), as.logical(snakemake@params[["plot_diversity"]]), FALSE) ) # Not sure why, by using "ifelse" only keeps the first element of the vector if ("labels" %in% base::names(snakemake@params)) { params[["labels"]] = snakemake@params[["labels"]] } if ("col" %in% base::names(snakemake@params)) { params[["col"]] = snakemake@params[["col"]] } if ("weights_exp" %in% base::names(snakemake@params)) { params[["weights_exp"]] = snakemake@params[["weights_exp"]] } base::message("Options provided:") utils::str(params) ################## ### Save plots ### ################## pdf(out_pdf) curves <- Nonpareil.set(in_files, labels = params[["labels"]], col = params[["col"]], enforce.consistency = params[["enforce_consistency"]], star = params[["star"]], correction.factor = params[["correction_factor"]], weights.exp = params[["weights_exp"]], skip.model = params[["skip_model"]], plot = FALSE ) lapply(curves$np.curves, plot, col = params[["col"]], plot.observed = params[["plot_observed"]], plot.model = params[["plot_model"]], plot.dispersion = params[["plot_dispersion"]], plot.diversity = params[["plot_diversity"]] ) dev.off() base::message("Nonpareil plot saved") ################## ### Save stats ### ################## stats <- summary(curves) # Fix names colnames(stats) <- c("Redundancy", "Avg. coverage", "Seq. effort", "Model correlation", "Required seq. effort", "Diversity") # If model not infered, set its values to NA stats[,4] <- sapply(stats[,4], function(x){if(length(x) == 0){NA} else {x}}) stats[,5] <- sapply(stats[,5], function(x){if(length(x) == 0){NA} else {x}}) # Convert to Gb stats[,3] <- stats[,3] / 1e9 stats[,5] <- stats[,5] / 1e9 # Round stats <- round(stats, digits = 2) # Print stats to log base::print(stats) ################## ### Save model ### ################## if ("model" %in% base::names(snakemake@output)) { save(curves, file=snakemake@output[["model"]]) } ################# ### Save JSON ### ################# if ("json" %in% base::names(snakemake@output)) { base::library("jsonlite") base::message("Exporting model as JSON") export_curve <- function(object){ # Extract variables n <- names(attributes(object))[c(1:12,21:29)] x <- sapply(n, function(v) attr(object,v)) names(x) <- n # Extract vectors n <- names(attributes(object))[13:20] y <- lapply(n, function(v) attr(object,v)) names(y) <- n # Add model # https://github.com/lmrodriguezr/nonpareil/blob/162f1697ab1a21128e1857dd87fa93011e30c1ba/utils/Nonpareil/R/Nonpareil.R#L330-L332 x_min <- 1e3 x_max <- signif(tail(attr(curves$np.curves[[1]],"x.adj"), n=1)*10, 1) x.model <- exp(seq(log(x_min), log(x_max), length.out=1e3)) y.model <- predict(object, lr=x.model) c(x, y, list(x.model=x.model), list(y.model=y.model)) } export_set <- function(object){ y <- lapply(object$np.curves, "export_curve") names(y) <- sapply(object$np.curves, function(n) n$label) jsonlite::prettify(toJSON(y, auto_unbox=TRUE)) } y <- export_set(curves) write(y, snakemake@output[["json"]]) base::message("JSON file saved") } # Proper syntax to close the connection for the log file # but could be optional for Snakemake wrapper base::sink(type = "message") base::sink() .. |nl| raw:: html