.. _`bio/merqury`: MERQURY ======= .. image:: https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/merqury?label=version%20update%20pull%20requests :target: https://github.com/snakemake/snakemake-wrappers/pulls?q=is%3Apr+is%3Aopen+label%3Abio/merqury Evaluate genome assemblies with k-mers and more. **URL**: https://github.com/marbl/merqury Example ------- This wrapper can be used in the following way: .. code-block:: python rule run_merqury_haploid: input: fasta="hap1.fasta", meryldb="meryldb", output: # meryldb output filt="results/haploid/meryldb.filt", hist="results/haploid/meryldb.hist", hist_ploidy="results/haploid/meryldb.hist.ploidy", # general output completeness_stats="results/haploid/out.completeness.stats", dist_only_hist="results/haploid/out.dist.only.hist", qv="results/haploid/out.qv", spectra_asm_hist="results/haploid/out.spectra_asm.hist", spectra_asm_ln_png="results/haploid/out.spectra_asm.png", # haplotype-specific output fas1_only_bed="results/haploid/hap1.bed", fas1_only_wig="results/haploid/hap1.wig", fas1_only_hist="results/haploid/hap1.hist", fas1_qv="results/haploid/hap1.qv", fas1_spectra_cn_hist="results/haploid/hap1.spectra.hist", fas1_spectra_cn_ln_png="results/haploid/hap1.spectra.png", log: std="logs/haploid.log", spectra_cn="logs/haploid.spectra-cn.log", threads: 1 wrapper: "v3.0.1/bio/merqury" rule run_merqury_diploid: input: fasta=["hap1.fasta", "hap2.fasta"], meryldb="meryldb", output: # meryldb output filt="results/diploid/meryldb.filt", hist="results/diploid/meryldb.hist", hist_ploidy="results/diploid/meryldb.hist.ploidy", # general output completeness_stats="results/diploid/out.completeness.stats", dist_only_hist="results/diploid/out.dist.only.hist", only_hist="results/diploid/out.only.hist", qv="results/diploid/out.qv", spectra_asm_hist="results/diploid/out.spectra_asm.hist", spectra_asm_ln_png="results/diploid/out.spectra_asm.png", spectra_cn_hist="results/diploid/out.spectra_cn.hist", spectra_cn_ln_png="results/diploid/out.spectra_cn.png", # haplotype-specific output fas1_only_bed="results/diploid/hap1.bed", fas1_only_wig="results/diploid/hap1.wig", fas1_only_hist="results/diploid/hap1.hist", fas1_qv="results/diploid/hap1.qv", fas1_spectra_cn_hist="results/diploid/hap1.spectra.hist", fas1_spectra_cn_ln_png="results/diploid/hap1.spectra.png", fas2_only_bed="results/diploid/hap2.bed", fas2_only_wig="results/diploid/hap2.wig", fas2_only_hist="results/diploid/hap2.hist", fas2_qv="results/diploid/hap2.qv", fas2_spectra_cn_hist="results/diploid/hap2.spectra.hist", fas2_spectra_cn_ln_png="results/diploid/hap2.spectra.png", log: std="logs/diploid.log", spectra_cn="logs/diploid.spectra-cn.log", threads: 1 wrapper: "v3.0.1/bio/merqury" Note that input, output and log file paths can be chosen freely. When running with .. code-block:: bash snakemake --use-conda the software dependencies will be automatically deployed into an isolated environment before execution. Notes ----- * `Merqury` does not return non-zero exit code when it fails, so always include (at least) one PNG file in your rule's output. * `Merqury` does not allow for extra params. Software dependencies --------------------- * ``merqury=1.3`` Input/Output ------------ **Input:** * one on two assembly fasta file(s) * meryl database **Output:** * annotation quality files Authors ------- * Filipe G. Vieira Code ---- .. code-block:: python __author__ = "Filipe G. Vieira" __copyright__ = "Copyright 2022, Filipe G. Vieira" __license__ = "MIT" import os import tempfile from pathlib import Path from snakemake.shell import shell meryldb_parents = snakemake.input.get("meryldb_parents", "") out_prefix = "out" log_tmp = "__LOG__.tmp" def save_output(out_prefix, ext, cwd, file): if file is None: return 0 src = f"{out_prefix}{ext}" dest = cwd / file shell("cat {src} > {dest}") with tempfile.TemporaryDirectory() as tmpdir: cwd = Path.cwd() # Create symlinks for input files for input in snakemake.input: src = Path(input) dst = Path(tmpdir) / input src = Path(os.path.relpath(src.resolve(), dst.resolve().parent)) dst.symlink_to(src) os.chdir(tmpdir) shell( "merqury.sh" " {snakemake.input.meryldb}" " {meryldb_parents}" " {snakemake.input.fasta}" " {out_prefix}" " > {log_tmp} 2>&1" ) ### Saving LOG files save_output(log_tmp, "", cwd, snakemake.log.get("std")) for type in ["spectra_cn"]: save_output( f"logs/{out_prefix}", "." + type.replace("_", "-") + ".log", cwd, snakemake.log.get(type), ) ### Saving OUTPUT files # EXT: replace all "_" with "." meryldb = Path(snakemake.input.meryldb.rstrip("/")).stem for type in ["filt", "hist", "hist_ploidy"]: save_output( meryldb, "." + type.replace("_", "."), cwd, snakemake.output.get(type) ) # EXT: replace last "_" with "." for type in [ "completeness_stats", "dist_only_hist", "only_hist", "qv", "hapmers_count", "hapmers_blob_png", ]: save_output( out_prefix, "." + type[::-1].replace("_", ".", 1)[::-1], cwd, snakemake.output.get(type), ) # EXT: replace first "_" with "-", and remaining with "." for type in [ "spectra_asm_hist", "spectra_asm_ln_png", "spectra_asm_fl_png", "spectra_asm_st_png", "spectra_cn_hist", "spectra_cn_ln_png", "spectra_cn_fl_png", "spectra_cn_st_png", ]: save_output( out_prefix, "." + type.replace("_", ".").replace(".", "-", 1), cwd, snakemake.output.get(type), ) input_fas = snakemake.input.fasta if isinstance(input_fas, str): input_fas = [input_fas] for fas in range(1, len(input_fas) + 1): prefix = Path(input_fas[fas - 1]).name.removesuffix(".fasta") # EXT: remove everything until first "_" and replace last "_" with "." for type in [f"fas{fas}_only_bed", f"fas{fas}_only_wig"]: save_output( prefix, type[type.find("_") :][::-1].replace("_", ".", 1)[::-1], cwd, snakemake.output.get(type), ) # EXT: remove everything until first "_" and replace all "_" with "." for type in [f"fas{fas}_only_hist", f"fas{fas}_qv"]: save_output( f"{out_prefix}.{prefix}", type[type.find("_") :].replace("_", "."), cwd, snakemake.output.get(type), ) # EXT: remove everything until first "_", replace first "_" with "-", and remaining with "." for type in [ f"fas{fas}_spectra_cn_hist", f"fas{fas}_spectra_cn_ln_png", f"fas{fas}_spectra_cn_fl_png", f"fas{fas}_spectra_cn_st_png", ]: save_output( f"{out_prefix}.{prefix}", "." + type[type.find("_") + 1 :].replace("_", ".").replace(".", "-", 1), cwd, snakemake.output.get(type), ) .. |nl| raw:: html