NONPAREIL PLOT
Plot Nonpareil results.
URL: https://nonpareil.readthedocs.io/en/latest/
Example
This wrapper can be used in the following way:
rule test_nonpareil_plot:
input:
npo="{sample}.npo",
output:
pdf="results/{sample}.pdf",
model="results/{sample}.RData",
json="results/{sample}.json",
threads: 1
log:
"logs/{sample}.log",
params:
labels=lambda w: w.sample,
col="blue",
enforce_consistency=True,
star=95,
correction_factor=True,
weights_exp=[-1.1,-1.2,-0.9,-1.3,-1],
skip_model=False,
plot_observed=True,
plot_model=True,
plot_dispersion="ci95",
plot_diversity=False,
wrapper:
"v3.9.0/bio/nonpareil/plot"
use rule test_nonpareil_plot as test_nonpareil_plot_multiple with:
input:
npo=["a.npo", "b.npo", "c.npo"],
output:
pdf="results/samples.pdf",
model="results/samples.RData",
json="results/samples.json",
log:
"logs/samples.log",
params:
labels=["Model A","Model B", "Model C"],
col=["blue","red"],
enforce_consistency=True,
star=95,
correction_factor=True,
plot_observed=True,
plot_model=True,
plot_dispersion="sd",
plot_diversity=True,
use rule test_nonpareil_plot as test_nonpareil_plot_nomodel with:
output:
pdf="results/{sample}.nomodel.pdf",
model="results/{sample}.nomodel.RData",
json="results/{sample}.nomodel.json",
log:
"logs/{sample}.nomodel.log",
params:
labels=lambda w: w.sample,
col="blue",
enforce_consistency=True,
star=95,
correction_factor=True,
weights_exp=[-1.1,-1.2,-0.9,-1.3,-1],
skip_model=True,
plot_observed=True,
plot_model=True,
plot_dispersion="iq",
plot_diversity=True
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Software dependencies
nonpareil=3.4.1
r-jsonlite
Input/Output
Input:
NPO file
Output:
PDF file with plot
Params
labels
: Curve labels.col
: Curve colors.enforce_consistency
: Fails verbosely on insufficient data, otherwise it warns about the inconsistencies and attempts the estimations.star
: Objective coverage in percentage (i.e., coverage value considered near-complete).correction_factor
: Apply overlap-dependent correction factor, otherwise redundancy is assumed to equal coverage.weights_exp
: Vector of values to be tested as exponent of the weights distribution.skip_model
: Skip model estimation.plot_observed
: Plot observed values?plot_model
: Plot infered model?plot_dispersion
: Plot dispersion around curve (one of sd, ci95, ci90, ci50, or iq)?plot_diversity
: Plot diversity (bottom arrow)?
Code
# __author__ = "Filipe G. Vieira"
# __copyright__ = "Copyright 2023, Filipe G. Vieira"
# __license__ = "MIT"
# This script plots results (NPO file) from NonPareil
# Sink the stderr and stdout to the snakemake log file
# https://stackoverflow.com/a/48173272
log.file <- file(snakemake@log[[1]], open = "wt")
base::sink(log.file)
base::sink(log.file, type = "message")
# Loading libraries (order matters)
base::library(package = "Nonpareil", character.only = TRUE)
base::message("Libraries loaded")
# Set input and output files
in_files = snakemake@input[["npo"]]
out_pdf = snakemake@output[[1]]
base::message("Input files: ")
base::print(in_files)
# Set parameters
params <- list("in_files" = in_files,
"out_pdf" = out_pdf,
"labels" = NA,
"col" = NA,
"enforce_consistency" = ifelse("enforce_consistency" %in% base::names(snakemake@params), as.logical(snakemake@params[["enforce_consistency"]]), FALSE),
"star" = ifelse("star" %in% base::names(snakemake@params), snakemake@params[["star"]], 95),
"correction_factor" = ifelse("correction_factor" %in% base::names(snakemake@params), as.logical(snakemake@params[["correction_factor"]]), FALSE),
"weights_exp" = NA,
"skip_model" = ifelse("skip_model" %in% base::names(snakemake@params), as.logical(snakemake@params[["skip_model"]]), FALSE),
"plot_observed" = ifelse("plot_observed" %in% base::names(snakemake@params), as.logical(snakemake@params[["plot_observed"]]), TRUE),
"plot_model" = ifelse("plot_model" %in% base::names(snakemake@params), as.logical(snakemake@params[["plot_model"]]), TRUE),
"plot_dispersion" = ifelse("plot_dispersion" %in% base::names(snakemake@params), snakemake@params[["plot_dispersion"]], FALSE),
"plot_diversity" = ifelse("plot_diversity" %in% base::names(snakemake@params), as.logical(snakemake@params[["plot_diversity"]]), FALSE)
)
# Not sure why, by using "ifelse" only keeps the first element of the vector
if ("labels" %in% base::names(snakemake@params)) {
params[["labels"]] = snakemake@params[["labels"]]
}
if ("col" %in% base::names(snakemake@params)) {
params[["col"]] = snakemake@params[["col"]]
}
if ("weights_exp" %in% base::names(snakemake@params)) {
params[["weights_exp"]] = snakemake@params[["weights_exp"]]
}
base::message("Options provided:")
utils::str(params)
##################
### Save plots ###
##################
pdf(out_pdf)
curves <- Nonpareil.set(in_files,
labels = params[["labels"]],
col = params[["col"]],
enforce.consistency = params[["enforce_consistency"]],
star = params[["star"]],
correction.factor = params[["correction_factor"]],
weights.exp = params[["weights_exp"]],
skip.model = params[["skip_model"]],
plot = FALSE
)
lapply(curves$np.curves, plot,
col = params[["col"]],
plot.observed = params[["plot_observed"]],
plot.model = params[["plot_model"]],
plot.dispersion = params[["plot_dispersion"]],
plot.diversity = params[["plot_diversity"]]
)
dev.off()
base::message("Nonpareil plot saved")
##################
### Save stats ###
##################
stats <- summary(curves)
# Fix names
colnames(stats) <- c("Redundancy", "Avg. coverage", "Seq. effort", "Model correlation", "Required seq. effort", "Diversity")
# Print stats to log
base::print(stats)
##################
### Save model ###
##################
if ("model" %in% base::names(snakemake@output)) {
save(curves, file=snakemake@output[["model"]])
}
#################
### Save JSON ###
#################
if ("json" %in% base::names(snakemake@output)) {
base::library("jsonlite")
base::message("Exporting model as JSON")
export_curve <- function(object){
# Extract variables
n <- names(attributes(object))[c(1:12,21:29)]
x <- sapply(n, function(v) attr(object,v))
names(x) <- n
# Extract vectors
n <- names(attributes(object))[13:20]
y <- lapply(n, function(v) attr(object,v))
names(y) <- n
curve_json <- c(x, y)
# Add model
if (object$has.model) {
# https://github.com/lmrodriguezr/nonpareil/blob/162f1697ab1a21128e1857dd87fa93011e30c1ba/utils/Nonpareil/R/Nonpareil.R#L330-L332
x_min <- 1e3
x_max <- signif(tail(attr(object,"x.adj"), n=1)*10, 1)
x.model <- exp(seq(log(x_min), log(x_max), length.out=1e3))
y.model <- predict(object, lr=x.model)
curve_json <- append(curve_json, list(x.model=x.model))
curve_json <- append(curve_json, list(y.model=y.model))
}
base::print(curve_json)
curve_json
}
export_set <- function(object){
y <- lapply(object$np.curves, "export_curve")
names(y) <- sapply(object$np.curves, function(n) n$label)
jsonlite::prettify(toJSON(y, auto_unbox=TRUE))
}
y <- export_set(curves)
write(y, snakemake@output[["json"]])
base::message("JSON file saved")
}
# Proper syntax to close the connection for the log file
# but could be optional for Snakemake wrapper
base::sink(type = "message")
base::sink()