MERQURY¶
Evaluate genome assemblies with k-mers and more.
URL: https://github.com/marbl/merqury
Example¶
This wrapper can be used in the following way:
rule run_merqury_haploid:
input:
fasta="hap1.fasta",
meryldb="meryldb",
output:
# meryldb output
filt="results/haploid/meryldb.filt",
hist="results/haploid/meryldb.hist",
hist_ploidy="results/haploid/meryldb.hist.ploidy",
# general output
completeness_stats="results/haploid/out.completeness.stats",
dist_only_hist="results/haploid/out.dist.only.hist",
qv="results/haploid/out.qv",
spectra_asm_hist="results/haploid/out.spectra_asm.hist",
spectra_asm_ln_png="results/haploid/out.spectra_asm.png",
# haplotype-specific output
fas1_only_bed="results/haploid/hap1.bed",
fas1_only_wig="results/haploid/hap1.wig",
fas1_only_hist="results/haploid/hap1.hist",
fas1_qv="results/haploid/hap1.qv",
fas1_spectra_cn_hist="results/haploid/hap1.spectra.hist",
fas1_spectra_cn_ln_png="results/haploid/hap1.spectra.png",
log:
std="logs/haploid.log",
spectra_cn="logs/haploid.spectra-cn.log",
threads: 1
wrapper:
"v1.31.1-39-gb5b9878a/bio/merqury"
rule run_merqury_diploid:
input:
fasta=["hap1.fasta", "hap2.fasta"],
meryldb="meryldb",
output:
# meryldb output
filt="results/diploid/meryldb.filt",
hist="results/diploid/meryldb.hist",
hist_ploidy="results/diploid/meryldb.hist.ploidy",
# general output
completeness_stats="results/diploid/out.completeness.stats",
dist_only_hist="results/diploid/out.dist.only.hist",
only_hist="results/diploid/out.only.hist",
qv="results/diploid/out.qv",
spectra_asm_hist="results/diploid/out.spectra_asm.hist",
spectra_asm_ln_png="results/diploid/out.spectra_asm.png",
spectra_cn_hist="results/diploid/out.spectra_cn.hist",
spectra_cn_ln_png="results/diploid/out.spectra_cn.png",
# haplotype-specific output
fas1_only_bed="results/diploid/hap1.bed",
fas1_only_wig="results/diploid/hap1.wig",
fas1_only_hist="results/diploid/hap1.hist",
fas1_qv="results/diploid/hap1.qv",
fas1_spectra_cn_hist="results/diploid/hap1.spectra.hist",
fas1_spectra_cn_ln_png="results/diploid/hap1.spectra.png",
fas2_only_bed="results/diploid/hap2.bed",
fas2_only_wig="results/diploid/hap2.wig",
fas2_only_hist="results/diploid/hap2.hist",
fas2_qv="results/diploid/hap2.qv",
fas2_spectra_cn_hist="results/diploid/hap2.spectra.hist",
fas2_spectra_cn_ln_png="results/diploid/hap2.spectra.png",
log:
std="logs/diploid.log",
spectra_cn="logs/diploid.spectra-cn.log",
threads: 1
wrapper:
"v1.31.1-39-gb5b9878a/bio/merqury"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Notes¶
- Merqury does not return non-zero exit code when it fails, so always include (at least) one PNG file in your rule’s output.
- Merqury does not allow for extra params.
Software dependencies¶
merqury=1.3
Input/Output¶
Input:
- one on two assembly fasta file(s)
- meryl database
Output:
- annotation quality files
Authors¶
- Filipe G. Vieira
Code¶
__author__ = "Filipe G. Vieira"
__copyright__ = "Copyright 2022, Filipe G. Vieira"
__license__ = "MIT"
import os
import tempfile
from pathlib import Path
from snakemake.shell import shell
meryldb_parents = snakemake.input.get("meryldb_parents", "")
out_prefix = "out"
log_tmp = "__LOG__.tmp"
def save_output(out_prefix, ext, cwd, file):
if file is None:
return 0
src = f"{out_prefix}{ext}"
dest = cwd / file
shell("cat {src} > {dest}")
with tempfile.TemporaryDirectory() as tmpdir:
cwd = Path.cwd()
# Create symlinks for input files
for input in snakemake.input:
src = Path(input)
dst = Path(tmpdir) / input
src = Path(os.path.relpath(src.resolve(), dst.resolve().parent))
dst.symlink_to(src)
os.chdir(tmpdir)
shell(
"merqury.sh"
" {snakemake.input.meryldb}"
" {meryldb_parents}"
" {snakemake.input.fasta}"
" {out_prefix}"
" > {log_tmp} 2>&1"
)
### Saving LOG files
save_output(log_tmp, "", cwd, snakemake.log.get("std"))
for type in ["spectra_cn"]:
save_output(
f"logs/{out_prefix}",
"." + type.replace("_", "-") + ".log",
cwd,
snakemake.log.get(type),
)
### Saving OUTPUT files
# EXT: replace all "_" with "."
meryldb = Path(snakemake.input.meryldb.rstrip("/")).stem
for type in ["filt", "hist", "hist_ploidy"]:
save_output(
meryldb, "." + type.replace("_", "."), cwd, snakemake.output.get(type)
)
# EXT: replace last "_" with "."
for type in [
"completeness_stats",
"dist_only_hist",
"only_hist",
"qv",
"hapmers_count",
"hapmers_blob_png",
]:
save_output(
out_prefix,
"." + type[::-1].replace("_", ".", 1)[::-1],
cwd,
snakemake.output.get(type),
)
# EXT: replace first "_" with "-", and remaining with "."
for type in [
"spectra_asm_hist",
"spectra_asm_ln_png",
"spectra_asm_fl_png",
"spectra_asm_st_png",
"spectra_cn_hist",
"spectra_cn_ln_png",
"spectra_cn_fl_png",
"spectra_cn_st_png",
]:
save_output(
out_prefix,
"." + type.replace("_", ".").replace(".", "-", 1),
cwd,
snakemake.output.get(type),
)
input_fas = snakemake.input.fasta
if isinstance(input_fas, str):
input_fas = [input_fas]
for fas in range(1, len(input_fas) + 1):
prefix = Path(input_fas[fas - 1]).name.removesuffix(".fasta")
# EXT: remove everything until first "_" and replace last "_" with "."
for type in [f"fas{fas}_only_bed", f"fas{fas}_only_wig"]:
save_output(
prefix,
type[type.find("_") :][::-1].replace("_", ".", 1)[::-1],
cwd,
snakemake.output.get(type),
)
# EXT: remove everything until first "_" and replace all "_" with "."
for type in [f"fas{fas}_only_hist", f"fas{fas}_qv"]:
save_output(
f"{out_prefix}.{prefix}",
type[type.find("_") :].replace("_", "."),
cwd,
snakemake.output.get(type),
)
# EXT: remove everything until first "_", replace first "_" with "-", and remaining with "."
for type in [
f"fas{fas}_spectra_cn_hist",
f"fas{fas}_spectra_cn_ln_png",
f"fas{fas}_spectra_cn_fl_png",
f"fas{fas}_spectra_cn_st_png",
]:
save_output(
f"{out_prefix}.{prefix}",
"." + type[type.find("_") + 1 :].replace("_", ".").replace(".", "-", 1),
cwd,
snakemake.output.get(type),
)