XSV

https://img.shields.io/github/issues-pr/snakemake/snakemake-wrappers/bio/xsv?label=version%20update%20pull%20requests

Perform various operations over CSV/TSV tables.

URL: https://github.com/BurntSushi/xsv

Example

This wrapper can be used in the following way:

### Concatenation subcommand ###
rule test_xsv_cat_rows:
    input:
        table=["table.csv", "right.csv"],
    output:
        "xsv_catrows.csv",
    threads: 1
    log:
        "xsv/catrow.log",
    params:
        subcommand="cat rows",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


rule test_xsv_cat_cols:
    input:
        table=["table.csv", "right.csv"],
    output:
        "xsv_catcols.csv",
    threads: 1
    log:
        "xsv/catcol.log",
    params:
        subcommand="cat columns",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Count subcommand ###
rule test_xsv_count:
    input:
        table="table.csv",
    output:
        "xsv_count.csv",
    threads: 1
    log:
        "xsv/count.log",
    params:
        subcommand="count",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


rule test_xsv_count_tsv_input:
    input:
        table="table.tsv",
    output:
        "xsv_count.tsv_as_input.csv",
    threads: 1
    log:
        "xsv/count.log",
    params:
        subcommand="count",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Fix lengths subcommand ###
rule test_xsv_fixlength:
    input:
        table="table.csv",
    output:
        "xsv_fixlength.csv",
    threads: 1
    log:
        "xsv/fixlength.log",
    params:
        subcommand="fixlengths",
        extra="--length 20",
    wrapper:
        "v2.1.1/bio/xsv"


### Flatten subcommand ###
rule test_xsv_flatten:
    input:
        table="table.csv",
    output:
        "xsv_flatten.csv",
    threads: 1
    log:
        "xsv/flatten.log",
    params:
        subcommand="flatten",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Format subcommand ###
rule test_xsv_fmt:
    input:
        table="table.csv",
    output:
        "xsv_fmt.tsv",
    threads: 1
    log:
        "xsv/fmt.log",
    params:
        subcommand="fmt",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Frequency subcommand ###
rule test_xsv_frequency:
    input:
        table="table.csv",
    output:
        "xsv_frequency.csv",
    threads: 1
    log:
        "xsv/frequency.log",
    params:
        subcommand="frequency",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Headers subcommand ###
rule test_xsv_headers:
    input:
        table="table.csv",
    output:
        "xsv_headers.csv",
    threads: 1
    log:
        "xsv/headers.log",
    params:
        subcommand="headers",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


rule test_xsv_headers_list:
    input:
        table=["table.csv", "right.csv"],
    output:
        "xsv_headers_all.csv",
    threads: 1
    log:
        "xsv/headers_all.log",
    params:
        subcommand="headers",
        extra="--intersect",
    wrapper:
        "v2.1.1/bio/xsv"


### Index subcommand ###
rule test_xsv_index:
    input:
        table="table.csv",
    output:
        "table.csv.idx",
    threads: 1
    log:
        "xsv/index.log",
    params:
        subcommand="index",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Input subcommand ###
rule test_xsv_input:
    input:
        table="table.csv",
    output:
        "xsv_input.csv",
    threads: 1
    log:
        "xsv/input.log",
    params:
        subcommand="input",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Join subcommand ###
rule test_xsv_join:
    input:
        table=["table.csv", "right.csv"],
    output:
        "xsv_join.csv",
    threads: 1
    log:
        "xsv/join.log",
    params:
        subcommand="join",
        col1="gene_id",
        col2="gene_id",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Sample subcommand ###
rule test_xsv_sample:
    input:
        table="table.csv",
    output:
        "xsv_sample.csv",
    threads: 1
    log:
        "xsv/sample.log",
    params:
        subcommand="sample",
        extra="1",
    wrapper:
        "v2.1.1/bio/xsv"


### Search subcommand ###
rule test_xsv_search:
    input:
        table="table.csv",
    output:
        "xsv_search.csv",
    threads: 1
    log:
        "xsv/search.log",
    params:
        subcommand="search",
        extra="--select gene_id ENSG[0-9]+",
    wrapper:
        "v2.1.1/bio/xsv"


### Select subcommand ###
rule test_xsv_select:
    input:
        table="table.csv",
    output:
        "xsv_select.csv",
    threads: 1
    log:
        "xsv/select.log",
    params:
        subcommand="select",
        extra="3-",
    wrapper:
        "v2.1.1/bio/xsv"


### Slice subcommand ###
rule test_xsv_slice:
    input:
        table="table.csv",
    output:
        "xsv_slice.csv",
    threads: 1
    log:
        "xsv/slice.log",
    params:
        subcommand="slice",
        extra="-i 2",
    wrapper:
        "v2.1.1/bio/xsv"


### Sort subcommand ###
rule test_xsv_sort:
    input:
        table="table.csv",
    output:
        "xsv_sort.csv",
    threads: 1
    log:
        "xsv/sort.log",
    params:
        subcommand="sort",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Split subcommand ###
rule test_xsv_split:
    input:
        table="table.csv",
    output:
        directory("xsv_split"),
    threads: 1
    log:
        "xsv/split.log",
    params:
        subcommand="split",
        extra="-s 2",
    wrapper:
        "v2.1.1/bio/xsv"


rule test_xsv_split_list:
    input:
        table="table.csv",
    output:
        expand("xsv_split/{nb}.csv", nb=["0", "1"]),
    threads: 1
    log:
        "xsv/split.log",
    params:
        subcommand="split",
        extra="-s 1",
    wrapper:
        "v2.1.1/bio/xsv"


### Stat subcommand ###
rule test_xsv_stats:
    input:
        table="table.csv",
    output:
        "xsv_stats.txt",
    threads: 1
    log:
        "xsv/stats.log",
    params:
        subcommand="stats",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"


### Table subcommand ###
rule test_xsv_table:
    input:
        table="right.csv",
    output:
        "xsv_table.txt",
    threads: 1
    log:
        "xsv/table.log",
    params:
        subcommand="table",
        extra="",
    wrapper:
        "v2.1.1/bio/xsv"

Note that input, output and log file paths can be chosen freely.

When running with

snakemake --use-conda

the software dependencies will be automatically deployed into an isolated environment before execution.

Notes

Adding table(s) index(es) to the input file list makes many subcommands faster.

Software dependencies

  • xsv=0.13.0

Input/Output

Input:

  • table: Path to CSV/TSV table.

Output:

  • Path the result file / directory

Params

  • extra: Optional arguments for xsv. For TSV files, –delimiter is automatically set to a tabulation.
  • subcommand: xsv subcommand among cat, count, fixlengths, flatten, fmt, frequency, headers, index, input, join, sample, search, select, slice, sort, split, stats, or table

Authors

  • Thibault Dayris

Code

__author__ = "Thibault Dayris"
__copyright__ = "Copyright 2023, Thibault Dayris"
__email__ = "thibault.dayris@gustaveroussy.fr"
__license__ = "MIT"

import os
from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=False, stderr=True)
subcommand = snakemake.params["subcommand"]
extra = snakemake.params.get("extra", "")

# TSV delimiter
if len(snakemake.input["table"]) == 1:
    if str(snakemake.input["table"]).endswith(".tsv"):
        extra += " --delimiter $'\t' "
elif all(str(table).endswith(".tsv") for table in snakemake.input["table"]):
    extra += " --delimiter $'\t' "


# Automatic multithreading when possible
if subcommand in ["frequency", "split", "stats"]:
    extra += f" --jobs {snakemake.threads} "
elif snakemake.threads > 1:
    raise Warning("Only one thread is required")

# Command line building
if subcommand == "join":
    shell(
        "xsv {subcommand} {extra} "
        "{snakemake.params.col1} {snakemake.input.table[0]} "
        "{snakemake.params.col2} {snakemake.input.table[1]} "
        "> {snakemake.output} {log}"
    )
elif subcommand == "index":
    log = snakemake.log_fmt_shell(stdout=True, stderr=True)
    shell("xsv {subcommand} {extra} {snakemake.input.table} {log}")
elif subcommand == "split":
    log = snakemake.log_fmt_shell(stdout=True, stderr=True)
    outdir = snakemake.output
    if len(outdir) > 1:
        outdir = os.path.dirname(outdir[0])
    shell("xsv {subcommand} {extra} {outdir} {snakemake.input.table} {log}")
else:
    shell(
        "xsv {subcommand} {extra} {snakemake.input.table} "
        " > {snakemake.output} {log}"
    )