TAXONKIT GENERIC WRAPPER

https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/taxonkit?label=version%20update%20pull%20requests

Run TaxonKit.

URL: https://bioinf.shenwei.me/taxonkit/

Example

This wrapper can be used in the following way:

rule taxonkit_list_txt:
    input:
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/list/{sample}.txt",
    log:
        "logs/list/{sample}.log",
    params:
        command="list",
        extra="--ids 36846609 --indent '\t' --show-name --show-rank",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_list_json:
    input:
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/list/{sample}.json",
    log:
        "logs/list/{sample}.log",
    params:
        command="list",
        extra="--ids 36846609 --show-name --show-rank",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_lineage:
    input:
        input="taxon_ids.txt",
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/lineage/{sample}.txt",
    log:
        "logs/lineage/{sample}.log",
    params:
        command="lineage",
        extra="--show-status-code",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_reformat:
    input:
        input="taxon_ids.txt",
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/reformat/{sample}.txt",
    log:
        "logs/reformat/{sample}.log",
    params:
        command="reformat",
        extra="--taxid-field 1",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_name2taxid:
    input:
        input="taxon_name.txt",
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/name2taxid/{sample}.txt",
    log:
        "logs/name2taxid/{sample}.log",
    params:
        command="name2taxid",
        extra="--show-rank",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_filter:
    input:
        input="taxon_ids.txt",
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/filter/{sample}.txt",
    log:
        "logs/filter/{sample}.log",
    params:
        command="filter",
        extra="--equal-to species",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_lca:
    input:
        input="taxon_ids.txt",
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/lca/{sample}.txt",
    log:
        "logs/lca/{sample}.log",
    params:
        command="lca",
        extra="--separator ','",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_create_taxdump:
    input:
        input=["lineages1.txt", "lineages2.txt"],
    output:
        taxdump=multiext(
            "out/create-taxdump/{sample}/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    log:
        "logs/create-taxdump/{sample}.log",
    params:
        command="create-taxdump",
        extra="--field-accession 1 --rank-names 'superkingdom,phylum,class,order,family,genus,species'",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_profile2cami:
    input:
        input="abundance.tsv",
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/profile2cami/{sample}.txt",
    log:
        "logs/profile2cami/{sample}.log",
    params:
        command="profile2cami",
        extra="--sample-id sample1 --taxonomy-id 2021-10-01",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"


rule taxonkit_cami_filter:
    input:
        input=rules.taxonkit_profile2cami.output[0],
        taxdump=multiext(
            "test-taxdump/",
            "taxid.map",
            "nodes.dmp",
            "names.dmp",
            "merged.dmp",
            "delnodes.dmp",
        ),
    output:
        "out/cami_filter/{sample}.tsv",
    log:
        "logs/cami_filter/{sample}.log",
    params:
        command="cami-filter",
        extra="--taxids 2759",
    threads: 2
    wrapper:
        "v3.9.0/bio/taxonkit"

Note that input, output and log file paths can be chosen freely.

When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Software dependencies

  • taxonkit=0.16.0

Input/Output

Input:

  • input: input file(s)

  • taxdump: taxdump files

Output:

  • taxdump: output taxdump files

Params

  • command: TaxonKit command to use.

  • extra: Optional parameters.

Authors

  • Filipe G. Vieira

Code

__author__ = "Filipe G. Vieira"
__copyright__ = "Copyright 2024, Filipe G. Vieira"
__license__ = "MIT"


from pathlib import Path
from snakemake.shell import shell


extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


input = snakemake.input.get("input", "")

in_taxdump = snakemake.input.get("taxdump", "")
if in_taxdump:
    in_taxdump = Path(in_taxdump[0]).parent
    in_taxdump = f"--data-dir {in_taxdump}"


out_taxdump = snakemake.output.get("taxdump", "")
if out_taxdump:
    out_taxdump = Path(out_taxdump[0]).parent
    extra += f" --out-dir {out_taxdump}"
else:
    if snakemake.output[0].endswith("json"):
        extra += " --json"
    extra += f" --out-file {snakemake.output}"


shell(
    "taxonkit {snakemake.params.command}"
    " --threads {snakemake.threads}"
    " {in_taxdump}"
    " {extra}"
    " {input}"
    " {log}"
)