AGAT
Another GTF/GFF Analysis Toolkit
URL: https://agat.readthedocs.io/en/stable/index.html
Example
This wrapper can be used in the following way:
rule test_agat_config:
output:
"out/agat_config.yaml",
threads: 1
log:
"logs/test_agat_config.log",
params:
command="config",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_levels:
output:
"out/agat_levels.yaml",
log:
"logs/test_agat_levels.log",
params:
command="levels",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_bed2gff:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
bed="input/test.bed",
output:
# The key here will be used as argument name in the final command line
output="out/test_agat_convert_bed2gff.gff",
log:
"logs/test_agat_convert_bed2gff.log",
params:
command="agat_convert_bed2gff.pl",
extra="", # Anything besides IO
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_embl2gff:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
embl="input/agat_convert_embl2gff_1.embl",
output:
# The key here will be used as argument name in the final command line
output="out/test_agat_convert_embl2gff.gff",
log:
"logs/test_agat_convert_embl2gff.log",
params:
command="agat_convert_embl2gff.pl",
extra="", # Anything besides IO
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_genscan2gff:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
genscan="input/test.genscan",
output:
# The key here will be used as argument name in the final command line
output="out/test_agat_convert_genscan2gff.gff",
log:
"logs/test_agat_convert_genscan2gff.log",
params:
command="agat_convert_genscan2gff.pl",
extra="", # Anything besides IO
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_mfannot2gff:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
mfannot="input/test.mfannot",
output:
# The key here will be used as argument name in the final command line
output="out/test_agat_convert_mfannot2gff.gff",
log:
"logs/agat_convert_mfannot2gff.log",
params:
# No optional non-file parameters with this subcommand.
command="agat_convert_mfannot2gff.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_minimap2_bam2gff_bam:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
i="input/test_minimap2.bam",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_convert_minimap2_bam2gff_bam.gff",
log:
"logs/agat_convert_minimap2_bam2gff_bam.log",
params:
# All non-file parameters are auto detected.
command="agat_convert_minimap2_bam2gff.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_minimap2_bam2gff_sam:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
i="input/test_minimap2.sam",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_convert_minimap2_bam2gff_sam.gff",
log:
"logs/agat_convert_minimap2_bam2gff_sam.log",
params:
# All non-file parameters are auto detected.
command="agat_convert_minimap2_bam2gff.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_sp_gff2bed:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_convert_sp_gff2bed.bed",
log:
"logs/agat_convert_sp_gff2bed.log",
params:
command="agat_convert_sp_gff2bed.pl",
extra="", # Non file arguments (`--nc` or `--sub`).
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_sp_gff2gtf:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_convert_sp_gff2gtf.gtf",
log:
"logs/agat_convert_sp_gff2gtf.log",
params:
command="agat_convert_sp_gff2gtf.pl",
extra="", # Non file arguments (`--gtf_version`).
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_sp_gff2tsv:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_convert_sp_gff2tsv.tsv",
log:
"logs/agat_convert_sp_gff2tsv.log",
params:
# No non-file parameters for this command
command="agat_convert_sp_gff2tsv.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_sp_gff2zff:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line
ann="out/test_agat_convert_sp_gff2zff.ann",
dna="out/test_agat_convert_sp_gff2zff.dna",
log:
"logs/agat_convert_sp_gff2zff.log",
params:
# No non-file parameters for this command
command="agat_convert_sp_gff2zff.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_convert_sp_gxf2gxf:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_convert_sp_gxf2gxf.gff",
log:
"logs/agat_convert_sp_gxf2gfx.log",
params:
command="agat_convert_sp_gxf2gxf.pl",
extra="", # Non file arguments (`--verbose`)
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_prokka_infer_name_from_attributes:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_sp_prokka_infer_name_from_attributes.gff",
log:
"logs/agat_sp_prokka_infer_name_from_attributes.log",
params:
command="agat_sp_prokka_infer_name_from_attributes.pl",
extra="", # Non file arguments (`--force`).
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_add_intergenic_regions:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_sp_add_intergenic_regions.gff",
log:
"logs/test_agat_sp_add_intergenic_regions.log",
params:
command="agat_sp_add_intergenic_regions.pl",
extra="", # Non file arguments (`--verbose`).
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_add_introns:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_sp_add_introns.gff",
log:
"logs/test_agat_sp_add_introns.log",
params:
# No non-file arguments.
command="agat_sp_add_introns.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_add_splice_sites:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_sp_add_splice_sites.gff",
log:
"logs/test_agat_sp_add_splice_sites.log",
params:
# No non-file arguments.
command="agat_sp_add_splice_sites.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_add_start_and_stop:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_sp_add_start_and_stop.gff",
log:
"logs/test_agat_sp_add_start_and_stop.log",
params:
extra="", # Non-file arguments (e.g. `--codon`, `--extend`, `--verbose`, or `--na`)
command="agat_sp_add_start_and_stop.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_alignment_output_style:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_sp_alignment_output_style.gff",
log:
"logs/test_agat_sp_alignment_output_style.log",
params:
extra="", # Non-file arguments (e.g. `--ct`, or `--verbose`)
command="agat_sp_alignment_output_style.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_clipN_seqExtremities_and_fixCoordinates:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line
og="out/test_agat_sp_clipN_seqExtremities_and_fixCoordinates.gff",
of="out/test_agat_sp_clipN_seqExtremities_and_fixCoordinates.fasta",
log:
"logs/test_agat_sp_clipN_seqExtremities_and_fixCoordinates.log",
params:
extra="", # Non-file arguments (e.g. `--of`, or `--og`)
command="agat_sp_clipN_seqExtremities_and_fixCoordinates.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_compare_two_annotations:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff1="input/annotation.gff",
gff2="input/prokka_fragmented_genes.gff",
output:
# The key here will be used as argument name in the final command line
# This subcommand returns a file per case, which names are build based
# on the comparison result as well as a report.
out=directory("out/test_agat_sp_compare_two_annotations"),
log:
"logs/test_agat_sp_compare_two_annotations.log",
params:
extra="", # Non-file arguments (`--verbose`)
command="agat_sp_compare_two_annotations.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_complement_annotations:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
ref="input/annotation.gff",
add="input/prokka_fragmented_genes.gff",
output:
# The key here will be used as argument name in the final command line
out="out/test_agat_sp_complement_annotations.gff",
log:
"logs/test_agat_sp_complement_annotations.log",
params:
extra="", # Non-file arguments (`--size_min`)
command="agat_sp_complement_annotations.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_ensembl_output_style:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_ensembl_output_style.gff",
log:
"logs/test_agat_sp_ensembl_output_style.log",
params:
extra="", # Non-file arguments (`--ct`, or `--verbose`)
command="agat_sp_ensembl_output_style.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_extract_attributes:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
Parent="out/test_agat_sp_extract_attributes_Parent.txt",
ID="out/test_agat_sp_extract_attributes_ID.txt",
log:
"logs/test_agat_sp_extract_attributes.log",
params:
# Non-file arguments besides `--att` (e.g. `-p`, `--merge`, or `-d`)
extra="-d",
command="agat_sp_extract_attributes.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_extract_sequences:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_extract_sequences.fasta",
log:
"logs/test_agat_sp_extract_attributes.log",
params:
# Non-file arguments (e.g. `--asc`, `--cdna`, `--cfs`, ...)
extra="-p -t cds",
command="agat_sp_extract_sequences.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_by_ORF_size:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# Matched = ORF size satisfying `--test` and `--size` criteria
matched="out/test_agat_sp_filter_by_ORF_size_matched.gff",
# Unmatched = ORF size *NOT* satisfying `--test` and `--size` criteria
unmatched="out/test_agat_sp_filter_by_ORF_size_unmatched.gff",
log:
"logs/test_agat_sp_filter_by_ORF_size.log",
params:
# Non-file arguments (e.g. `--size`, `--test`, or `--verbose`)
extra="",
command="agat_sp_filter_by_ORF_size.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_by_locus_distance:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
output="out/test_agat_sp_filter_by_locus_distance.gff",
log:
"logs/test_agat_sp_filter_by_locus_distance.log",
params:
# Non-file arguments (e.g. `-d`, or `--add_flag`)
extra="",
command="agat_sp_filter_by_locus_distance.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_feature_by_attribute_presence:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_filter_feature_by_attribute_presence.gff",
log:
"logs/test_agat_sp_filter_feature_by_attribute_presence.log",
params:
# Non-file arguments (`--type`, `--flip` or `--att`)
extra="--type gene --att Parent",
command="agat_sp_filter_feature_by_attribute_presence.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_feature_by_attribute_value:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_filter_feature_by_attribute_value.gff",
log:
"logs/test_agat_sp_filter_feature_by_attribute_value.log",
params:
# Non-file arguments (e.g. `--type`, `--value`, `--value_insensitive`...)
extra="--type exon --attribute constitutive --value 1",
command="agat_sp_filter_feature_by_attribute_value.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_feature_from_keep_list:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
keep_list="input/keep_list.txt",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_filter_feature_from_keep_list.gff",
log:
"logs/test_agat_sp_filter_feature_from_keep_list.log",
params:
# Non-file arguments (`--type`, or `--verbose`)
extra="--verbose 4",
command="agat_sp_filter_feature_from_keep_list.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_feature_from_kill_list:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
kill_list="input/kill_list.txt",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_filter_feature_from_kill_list.gff",
log:
"logs/test_agat_sp_filter_feature_from_kill_list.log",
params:
# Non-file arguments (`--type`, or `--verbose`)
extra="--verbose 4",
command="agat_sp_filter_feature_from_kill_list.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_gene_by_intron_numbers:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_filter_gene_by_intron_numbers.gff",
log:
"logs/test_agat_sp_filter_gene_by_intron_numbers.log",
params:
# Non-file arguments (`--number`, `--verbose`, or `--test`)
extra="--test '>=' --nb 3",
command="agat_sp_filter_gene_by_intron_numbers.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_gene_by_length:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line
o="out/test_agat_sp_filter_gene_by_length.gff",
log:
"logs/test_agat_sp_filter_gene_by_length.log",
params:
# Non-file arguments (`--size`, `--verbose`, or `--test`)
extra="--test '>=' --size 3",
command="agat_sp_filter_gene_by_length.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_incomplete_gene_coding_models:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line
output="out/test_agat_sp_filter_incomplete_gene_coding_models.gff",
log:
"logs/test_agat_sp_filter_incomplete_gene_coding_models.log",
params:
# Non-file arguments (e.g. `--codon`, `--add_flag`, `--skip_start_check`, ...)
extra="--verbose 4",
command="agat_sp_filter_incomplete_gene_coding_models.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_filter_record_by_coordinates:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
coordinates="input/coordinates.tsv",
output:
# The key here will be used as argument name in the final command line.
# The directory contains a file per interval (if any record exists within),
# a GFF with records belonging to none of the intervald given in coordinates,
# and a text report.
o=directory("out/test_agat_sp_filter_record_by_coordinates"),
log:
"logs/test_agat_sp_filter_record_by_coordinates.log",
params:
# Non-file arguments (`--exclude` or `--verbose`)
extra="--verbose 4",
command="agat_sp_filter_record_by_coordinates.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_fix_cds_phases:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line.
o="out/test_agat_sp_fix_cds_phases.gff",
log:
"logs/test_agat_sp_fix_cds_phases.log",
params:
# Non-file arguments (`--verbose`)
extra="",
command="agat_sp_fix_cds_phases.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_fix_features_locations_duplicated:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line.
output="out/test_agat_sp_fix_features_locations_duplicated.gff",
log:
"logs/test_agat_sp_fix_features_locations_duplicated.log",
params:
# Non-file arguments (`--verbose`, or `--model`)
extra="",
command="agat_sp_fix_features_locations_duplicated.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_fix_fusion:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# Optional file contains all features in the GFF
all="out/test_agat_sp_fix_fusion_all.gff",
# Optional file only contains features that were modified by agat
only_modified="out/test_agat_sp_fix_fusion_modified.gff",
# Optional file only contains features that were *NOT* modified by agat
intact="out/test_agat_sp_fix_fusion_intact.gff",
# An optional report over the number of modified/intact features
report="out/test_agat_sp_fix_fusion_report.txt",
log:
"logs/test_agat_sp_fix_fusion.log",
params:
# Non-file arguments (`--codon`, `--threshold`, `--stranded`, or `--verbose`)
extra="",
command="agat_sp_fix_fusion.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_fix_longest_ORF:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# Optional file contains all features in the GFF
all="out/test_agat_sp_fix_longest_ORF_all.gff",
# Optional file only contains features that were modified by agat
only_modified="out/test_agat_sp_fix_longest_ORF_modified.gff",
# Optional file only contains features that were *NOT* modified by agat
intact="out/test_agat_sp_fix_longest_ORF_intact.gff",
# An optional report over the number of modified/intact features
report="out/test_agat_sp_fix_longest_ORF_report.txt",
log:
"logs/test_agat_sp_fix_longest_ORF.log",
params:
# Non-file arguments (`--codon`, `--split`, `--stranded`, or `--verbose`)
extra="",
command="agat_sp_fix_longest_ORF.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_fix_overlaping_genes:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line.
output="out/test_agat_sp_fix_overlaping_genes.gff",
log:
"logs/test_agat_sp_fix_overlaping_genes.log",
params:
# Non-file arguments (`--merge` or `--verbose`)
extra="",
command="agat_sp_fix_overlaping_genes.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_fix_small_exon_from_extremities:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line.
output="out/test_agat_sp_fix_small_exon_from_extremities.gff",
log:
"logs/test_agat_sp_fix_small_exon_from_extremities.log",
params:
# Non-file arguments (`--merge`, `--size`, `--codon`, or `--verbose`)
extra="",
command="agat_sp_fix_small_exon_from_extremities.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_flag_premature_stop_codons:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_flag_premature_stop_codons.gff",
log:
"logs/test_agat_sp_flag_premature_stop_codons.log",
params:
extra="", # Non-file argument: `--codon`
command="agat_sp_flag_premature_stop_codons.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_flag_short_introns:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_flag_short_introns.gff",
log:
"logs/test_agat_sp_flag_short_introns.log",
params:
# Non-file parameters (`--intron_size` or `--verbose`)
extra="",
command="agat_sp_flag_short_introns.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_functional_statistics:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line.
# This directory contains sub-directory for each type of features available
# in the GFF/GTF file, as long as a report
o=directory("out/test_agat_sp_functional_statistics"),
log:
"logs/test_agat_sp_functional_statistics.log",
params:
# Non-file parameters (`--gs`)
extra="",
command="agat_sp_functional_statistics.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_keep_longest_isoform:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line.
o="out/test_agat_sp_keep_longest_isoform.gff",
log:
"logs/test_agat_sp_keep_longest_isoform.log",
params:
# No non-file parameters allowed
command="agat_sp_keep_longest_isoform.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_kraken_assess_liftover:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gtf="input/test_kraken.gtf",
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_kraken_assess_liftover.gff",
log:
"logs/test_agat_sp_kraken_assess_liftover.log",
params:
# Non-file parameters (`--threshold` or `--verbose`)
extra="",
command="agat_sp_kraken_assess_liftover.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_list_short_introns:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_list_short_introns.gff",
log:
"logs/test_agat_sp_list_short_introns.log",
params:
# Non-file parameter: `--size`
extra="",
command="agat_sp_list_short_introns.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_manage_IDs:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_manage_IDs.gff",
log:
"logs/test_agat_sp_manage_IDs.log",
params:
# Non-file parameters (e.g. `--gap`, `--ensembl`, `--prefix`, ...)
extra="",
command="agat_sp_manage_IDs.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_manage_UTRs:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# Optional path to PDF histogram of 5'UTR with at least <n> exons
five_prime_utr_overORequal="out/test_agat_sp_manage_UTRs_five_prime_utr_overORequal.pdf",
# Optional path to PDF histogram of 5'UTR with less than <n> exons
five_prime_utr_under="out/test_agat_sp_manage_UTRs_five_prime_utr_under.pdf",
# Optional path to PDF histogram of 3'UTR with at least <n> exons
three_prime_utr_overORequal="out/test_agat_sp_manage_UTRs_three_prime_utr_overORequal.pdf",
# Optional path to PDF histogram of 3'UTR with at least <n> exons
three_prime_utr_under="out/test_agat_sp_manage_UTRs_three_prime_utr_under.pdf",
# Optional path to PDF histogram of both 3/5'UTR with at least <n> exons
# both_utr_overORequal="out/test_agat_sp_manage_UTRs_both_utr_overORequal.pdf",
# Optional path to PDF histogram of both 3/5'UTR with at least <n> exons
# both_utr_under="out/test_agat_sp_manage_UTRs_both_utr_under.pdf",
# Optional path to analysis report
report="out/test_agat_sp_manage_UTRs_report.txt",
log:
"logs/test_agat_sp_manage_UTRs.log",
params:
# Non-file parameters (e.g. `--number`, `--three`, `--five`, or `--both`)
# The parameter `--plot` is automatically infered
extra="--three --five -n 6",
command="agat_sp_manage_UTRs.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_manage_attributes:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_manage_attributes.gff",
log:
"logs/test_agat_sp_manage_attributes.log",
params:
# Non-file parameters (`--type`, `--tag`, `--add`, `--cp`, or `--overwrite`)
extra="--att biotype",
command="agat_sp_manage_attributes.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_manage_functional_annotation:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sp_manage_functional_annotation/02413F.gff",
# Optional path to blast, interpro, and reference fasta
blast="input/agat_sp_manage_functional_annotation/02413F_blast.out",
db="input/agat_sp_manage_functional_annotation/uniprot_sprot_test.fasta",
interpro="input/agat_sp_manage_functional_annotation/02413F_interpro.tsv",
output:
# The key here will be used as argument name in the final command line.
gff="out/test_agat_sp_manage_functional_annotation.gff",
# Global text report
report="out/test_agat_sp_manage_functional_annotation.report.txt",
# Warnings and non-blocking format errors
error="out/test_agat_sp_manage_functional_annotation.error.txt",
# Duplicates blast sequences, requires blast and db
duplicates="out/test_agat_sp_manage_functional_annotation.duplicates.tsv",
# Conserved protein domains database terms, requires all optional input
cdd="out/test_agat_sp_manage_functional_annotation.cdd.tsv",
# Gene onthology database terms, requires all optional input
go="out/test_agat_sp_manage_functional_annotation.go.tsv",
# Interpro domain database terms, requires all optional input
interpro="out/test_agat_sp_manage_functional_annotation.interpro.tsv",
# MobiDB Lite database terms, requires all optional input
mobidb="out/test_agat_sp_manage_functional_annotation.mobidblite.tsv",
# Panther database terms, requires all optional input
panther="out/test_agat_sp_manage_functional_annotation.panther.tsv",
# Superfamily database terms, requires all optional input
superfamily="out/test_agat_sp_manage_functional_annotation.superfamily.tsv",
log:
"logs/test_agat_sp_manage_functional_annotation.log",
params:
# Non-file parameters (e.g. `--blast_evalue`, `--pe`, `--clean_ontology`, ...)
# The parameter `--plot` is automatically infered.
extra="",
command="agat_sp_manage_functional_annotation.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_manage_introns:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# Optional path to intron report
report="out/test_agat_sp_manage_introns_report.txt",
# Optional path to cds histogram (triggers the `--plot` argument)
cds_pdf="out/test_agat_sp_manage_introns_cds.pdf",
# Optional path to exon histogram (triggers the `--plot` argument)
exon_pdf="out/test_agat_sp_manage_introns_exons.pdf",
# Optional path to 3'UTR histogram (triggers the `--plot` argument)
# three_prime_utr_pdf="out/test_agat_sp_manage_introns_exons_3p.pdf",
# Optional path to 5'UTR histogram (triggers the `--plot` argument)
five_prime_utr_pdf="out/test_agat_sp_manage_introns_exons_5p.pdf",
log:
"logs/test_agat_sp_manage_introns.log",
params:
# Non-file parameters (`--p`, `--window`)
# The parameter `--plot` is automatically infered.
extra="",
command="agat_sp_manage_introns.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_merge_annotations:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff=["input/prokka_fragmented_genes.gff", "input/annotation.gff"],
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_merge_annotations.gff",
log:
"logs/test_agat_sp_merge_annotations.log",
params:
# No non-file parameters
command="agat_sp_merge_annotations.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_move_attributes_within_records:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# The key here will be used as argument name in the final command line.
out="out/test_agat_sp_move_attributes_within_records.gff",
log:
"logs/test_agat_sp_move_attributes_within_records.log",
params:
# Non-file parameters (`--fp`, `--fc`, `--attribute`, or `--verbose`)
extra="--feature_copy mRNA --feature_paste CDS",
command="agat_sp_move_attributes_within_records.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_prokka_fix_fragmented_gene_annotations:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/prokka_fragmented_genes.gff",
fasta="input/prokka_cav_10DC88.fa",
db="input/prokka_bacteria_sprot.fa",
output:
# The key here will be used as argument name in the final command line.
# The output directory contains a `report.txt` and additional files
# depending on the content of input file(s).
out=directory("out/test_agat_sp_prokka_fix_fragmented_gene_annotations"),
log:
"logs/test_agat_sp_prokka_fix_fragmented_gene_annotations.log",
params:
# Non-file parameters:
# `--frags`, `--pseudo`, `--hmap_size`, `--ct`, `--skip_hmap`, or `--verbose`
extra="--skip_hamap", # To speed up tests
command="agat_sp_prokka_fix_fragmented_gene_annotations.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_sensitivity_specificity:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff1="input/prokka_fragmented_genes.gff",
gff2="input/annotation.gff",
output:
# The key here will be used as argument name in the final command line.
output="out/test_agat_sp_sensitivity_specificity.txt",
log:
"logs/test_agat_sp_sensitivity_specificity.log",
params:
# Non-file parameters (`--verbose`)
extra="",
command="agat_sp_sensitivity_specificity.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_separate_by_record_type:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# The key here will be used as argument name in the final command line.
# The output directory contains up to one file per record type among the
# huge list of records available in `agat levels`.
o=directory("out/test_agat_sp_separate_by_record_type"),
log:
"logs/test_agat_sp_separate_by_record_type.log",
params:
# No non-file parameters
command="agat_sp_separate_by_record_type.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sp_statistics:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# Optional Path to text report
report="out/test_agat_sp_statistics.txt",
# Optional path to yaml report (triggers the `--yaml` argument)
yaml="out/test_agat_sp_statistics.yaml",
# Optional path to directory containing multiple plots, corresponding
# to each level present in the input GFF file and described by the
# `agat levels` command. Triggers `-p` argument.
plot=directory("out/test_agat_sp_statistics_plots"),
log:
"logs/test_agat_sp_statistics.log",
params:
# Non-file parameters: `--gs`, or `--verbose`)
# All other non-file parameters are automatically infered
extra="--gs 432709230",
command="agat_sp_statistics.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_add_attributes_from_tsv:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
tsv="input/agat_sq_add_attributes_from_tsv.tsv",
output:
# Optional Path to text report
o="out/test_agat_sq_add_attributes_from_tsv.gff",
log:
"logs/test_agat_sq_add_attributes_from_tsv.log",
params:
# Non-file parameters: `--verbose`
# All other parameters are automatically included
extra="",
command="agat_sq_add_attributes_from_tsv.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_add_hash_tag:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# Optional Path to text report
o="out/test_agat_sq_add_hash_tag.gff",
log:
"logs/test_agat_sq_add_hash_tag.log",
params:
extra="--interval 1",
command="agat_sq_add_hash_tag.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_add_locus_tag:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/agat_sq_add_attributes_from_tsv.gff",
output:
# Optional Path to text report
o="out/test_agat_sq_add_locus_tag.gff",
log:
"logs/test_agat_sq_add_locus_tag.log",
params:
# Non-file parameters, eg: `--type`, `--lo`, `--li`, `--of` or `--quiet`
extra="",
command="agat_sq_add_locus_tag.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_filter_feature_from_fasta:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
fasta="input/sequence.fa",
output:
# Optional Path to text report
o="out/test_agat_sq_filter_feature_from_fasta.gff",
log:
"logs/test_agat_sq_filter_feature_from_fasta.log",
params:
# Only verbosity parameter is expected, other ones are automatically filled
extra="--verbose 4",
command="agat_sq_filter_feature_from_fasta.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_list_attributes:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# Optional Path to attributes
output="out/test_agat_sq_list_attributes.txt",
log:
"logs/test_agat_sq_list_attributes.log",
params:
# Only `-p`, `-t` or `-l`, other options are automatically infered
extra="-p level2,cds,exon",
command="agat_sq_list_attributes.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_manage_IDs:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# Optional Path to ids
o="out/test_agat_sq_manage_IDs.txt",
log:
"logs/test_agat_sq_manage_IDs.log",
params:
# Only `--verbose`, all other parameters are infered
extra="--verbose 4",
command="agat_sq_manage_IDs.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_manage_attributes:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/annotation.gff",
output:
# Optional Path to GFF
output="out/test_agat_sq_manage_attributes.gff",
log:
"logs/test_agat_sq_manage_attributes.log",
params:
# One of `--type`, `--tag` `--add`, `--vp`,
# `--overwrite`, `--value` or `--strategy`
extra="--tag locus_tag,product,name/NewName --type level2,cds,exon",
command="agat_sq_manage_attributes.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_mask:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/prokka_fragmented_genes.gff",
fasta="input/prokka_cav_10DC88.fa",
output:
# Path to masked GFF/GTF
o="out/test_agat_sq_mask.gff",
log:
"logs/test_agat_sq_mask.log",
params:
# Only `--sm` or `--hm` (mutually exclusive), all other parameters are infered.
extra="--hm",
command="agat_sq_mask.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_remove_redundant_entries:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/prokka_fragmented_genes.gff",
output:
# Path to GFF entries
o="out/test_agat_sq_remove_redundant_entries.gff",
log:
"logs/test_agat_sq_remove_redundant_entries.log",
params:
# All parameters are filled automatically
command="agat_sq_remove_redundant_entries.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_repeats_analyzer:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/prokka_fragmented_genes.gff",
# Optional path to fasta sequence, see params.extra
genome="input/prokka_cav_10DC88.fa",
output:
# Path to repeats
o="out/test_agat_sq_repeats_analyzer.gff",
log:
"logs/test_agat_sq_repeats_analyzer.log",
params:
# If no fasta file is provided in `input.genome`, then provide the genome size
# through optional parameter below:
# extra="--genome 125",
command="agat_sq_repeats_analyzer.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_reverse_complement:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/prokka_fragmented_genes.gff",
fasta="input/prokka_cav_10DC88.fa",
output:
# Path to revert complement
o="out/test_agat_sq_reverse_complement.gff",
log:
"logs/test_agat_sq_reverse_complement.log",
params:
# Only verbosity parameter is allowed, other ones are automatically filled
extra="--verbose 4",
command="agat_sq_reverse_complement.pl",
wrapper:
"v9.6.0/bio/agat"
rule test_agat_sq_rfam_analyzer:
input:
# The key here will be used as argument name in the final command line
# config="", # Optional path to configuration file
gff="input/prokka_fragmented_genes.gff",
genome="input/prokka_cav_10DC88.fa",
output:
# Optional Path to ids
o="out/test_agat_sq_rfam_analyzer.tsv",
log:
"logs/test_agat_sq_rfam_analyzer.log",
params:
# If no fasta file is provided in `input.genome`, then provide the genome size
# through optional parameter below:
# extra="--genome 125",
command="agat_sq_rfam_analyzer.pl",
wrapper:
"v9.6.0/bio/agat"
Note that input, output and log file paths can be chosen freely.
When running with
snakemake --use-conda
the software dependencies will be automatically deployed into an isolated environment before execution.
Notes
Remember that input and output are reserved key-words in Snakemake. Use alternate command line argument names to avoid Workflow errors: i instead of input, and o instead of output.
Software dependencies
agat=1.7.0
Input/Output
Input:
Named input files. Names will be used as argument values in the final command line.
Output:
Named output files. Names will be used as argument values in the final command line.
Params
command: Agat command usedextra: Any non file parameter.
Code
#! coding: utf-8
__author__ = "Thibault Dayris"
__copyright__ = "Copyright 2025, Thibault Dayris"
__license__ = "MIT"
from snakemake.shell import shell
from tempfile import TemporaryDirectory
import os.path # Get output files prefix
import shlex # Ensure quotes escape
import warnings # Warn user on un-supported subcommands
# Agat IO options do not follow the same patterns from one tool to another.
# We let user provide the correct argument name, just like in NextFlow and Seqkit
# wrappers.
def parse_args(io):
"""
Build command line from a mapping: arg / path
We also make sure paths are correctly quoted, since Agat produces
names with {, }, <, >, or -. And we must be able to move these files.
"""
args = ""
for argname, path in io.items():
# Some keys in a GFF/GTF can contain interpretable characters in bash.
# Variable `path` is being casted into strings, and `:q` argument
# cannot be used to quote strings. Back to plain old quotes.
argname = f" --{argname} " if len(argname) > 1 else f" -{argname} "
if isinstance(path, str):
args += f" {argname} {shlex.quote(str(path))}"
# Handle repeated arguments for commands like `agat_sp_merge_annotations.pl`
elif isinstance(path, list):
args += "".join([f" {argname} {shlex.quote(str(p))} " for p in path])
return args
def join_and_run_commands(command_lines):
"""
Safely join multiple commands and run them.
When multiple command lines are used sequencially, we capture all their
stdout/stderr messages and let Snakemake stop on error if needed.
"""
# Make sure each command line is use one after each other
# if and only if the previous had a zero return code
# (in case default Snakemake behavior was modified)
command_lines = " && ".join(command_lines)
# Run all command lines in a subshell to gather all std/err logs
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
shell(f"({command_lines}) {log}")
def shell_rename(source, destination):
"""
Move/Rename files with Shell rather than Python, in order
to keep a track of renaming scheme in Snakemake logs
Some commands have only one output file to move, the next
function deals with multiple optional output files to move
We also make sure paths are correctly quoted, since Agat produces
names with {, }, <, >, or -. And we must be able to move these files.
"""
# Make sure bash-interpretable characters are not interpreted
# Make sure spaces are not splitted
# Snakemake syntax :q to quote does not work here, using plain quotes
return f"mv --verbose {shlex.quote(str(source))} {shlex.quote(str(destination))}"
def move_multiple_files(expected_files, snakemake_output):
"""
As suggested in snakemake-wrappers issue #3976, we will move
expected output files on user demand, and using a dictionary.
For each key/value (arg/path) in expected_files dictionary,
search if user expects them in snakemake.output.
If so, yield shell_rename.
"""
for key, path in expected_files.items():
snake_out = snakemake_output.get(key)
if snake_out:
yield shell_rename(path, snake_out)
def get_gff_basename(snake_input):
"""
Many Agat commands base their output file name on GTF/GFF input file
name. We set-up the GFF/GTF base name extraction here once for all.
Search the GFF/GTF input file name among the list of possible
"""
# One of the output file is build on the GFF/GTF input file
gff_basename = ""
gff_keys = ("f", "ref", "reffile", "gff", "gff3", "gtf")
for key in gff_keys:
gff = snakemake.input.get(key)
if gff:
# Case GFF is found
gff_basename = os.path.basename(gff)
break
else:
# Case no GFF is found at all
raise KeyError(
"A GFF/GTF should be provided using one of "
f"{gff_keys} key in snakemake rule."
)
return gff_basename
def find_arg_value(extra, param_name, default):
"""
Somme commands name the output files with suffixes depending on
parameter value. This function returns the value given to a parameter.
Note that agat does not allow `--param=value` scheme, and only accepts
`--param value`
"""
# Remove double spaces
extra = " ".join(extra.split())
if isinstance(param_name, str):
param_name = (param_name,)
# Seach parameter name
params = extra.split()
for index, param in enumerate(params):
if param in param_name:
return params[index + 1]
# Case parameter is not in the provided `extra`
return default
# While most of the commands ends with ".pl", some of them don't
# (e.g. `agat config`). We are not adding ".pl" automatically,
# and let user provide the exact agat command/script name.
command = snakemake.params.get("command")
if command is None:
raise ValueError(
"An agat script name or subcommand "
"should be given through the snakemake rule params."
)
elif command in (
"agat_sp_compare_two_BUSCOs.pl",
"agat_sp_filter_by_mrnaBlastValue.pl",
"agat_sp_load_function_from_protein_align.pl",
):
warnings.warn(f"The command `{command}` was not tested in Agat Snakemake-Wrappers.")
# Handling optional parameters
extra = snakemake.params.get("extra", "")
if command == "agat_convert_minimap2_bam2gff.pl":
# This command allows either `-i` or `--input` to specify BAM file.
# Since `input` is a protected keyword in Snakemake, the only key that
# works to point to BAM is `i`.
input_file = str(snakemake.input["i"])
if input_file.endswith("bam"):
extra += " --bam "
elif input_file.endswith("sam"):
extra += " --sam "
# Some commands produce file(s) with fixed names. To avoid collisions
# during possible concurrent execution, these commands will be executed in
# a temporary directory.
# The generic case is at the end of the script, let's deal with
# specific cases first.
with TemporaryDirectory() as tempdir:
# Access/modify configuration files
if command in ("config", "levels"):
# Special case: output file name cannot be chosen freely
yaml_file = "agat_config.yaml" if command == "config" else "feature_levels.yaml"
join_and_run_commands(
[
# Move to tempdir and execute agat command
f"cd {tempdir}",
f"agat {command} --expose",
"cd -",
# Make results available where user expects them
shell_rename(f"{tempdir}/{yaml_file}", snakemake.output),
]
)
elif command == "agat_sp_extract_attributes.pl":
# Special case: output file name have a fixed suffix.
# In order to let user choose output file name freely, we must
# move the results at the end of the command execution.
# The argument `--att` contains a single attribute, or a comma-separated
# list of attributes that will be used as file extension.
# e.g. `--att Parents,ID --out prefix` that will produce both
# `prefix_Parents` and `prefix_ID` files.
# We need to identify pefixes in order to rename output files correctly.
# To do so,we'll use the keys in the output section of the Snakemake
# rule to build the command line and link a result to its correct name.
att = " --att "
for argvalue in snakemake.output.keys():
if att.endswith(" "):
# Deal with first attribute
att += str(argvalue)
else:
# Deal with optional additional attribute(s)
att += f",{argvalue}"
basename = f"{tempdir}/snake_out_prefix"
extra += f" {att} --out {basename} "
extra += parse_args(snakemake.input)
# Get GFF
gff = snakemake.input.get("g")
if not gff:
gff = snakemake.input.get("gff")
command_lines = [f"{command} {extra}"]
for suffix, path in snakemake.output.items():
# Make results available where user expects them
command_lines.append(shell_rename(f"{basename}_{suffix}", path))
join_and_run_commands(command_lines)
elif command == "agat_sp_filter_by_ORF_size.pl":
# Special case: This command returns a pair of files:
# 1. All genic features with and ORF that statisfies command line criteria
# 2. Rest of the genic features (aka. NOT satisfying command line criteria)
# Output file extention are defined according to command line content.
# Add a known prefix to output files
prefix = f"{tempdir}/snake_out_ORF"
extra += f" --output {prefix} "
extra += parse_args(snakemake.input)
command_lines = [f"{command} {extra}"]
# Output file extension can be predicted from command line:
test = "sup" # Default test value is >
test_value = find_arg_value(
extra=extra,
param_name=("--test", "-t"),
default=">",
)
# Either over of inferior
if ">" in test_value:
test = "sup"
elif "<" in test_value:
test = "inf"
# May be equal or over (sup=) or equal or inferior (inf=)
if "=" in test_value:
test += "="
size = find_arg_value(
extra=extra,
param_name=("--size", "-s"),
default="100",
)
# Warning, `test` holds bash interpretable characters
# use single quotes to move them
matched = f"{prefix}_{test}{size}.gff"
unmatched = f"{prefix}_NOT_{test}{size}.gff"
# Make output files available for user
snake_matched = snakemake.output.get("matched")
if snake_matched:
command_lines.append(shell_rename(matched, snake_matched))
snake_unmatched = snakemake.output.get("unmatched")
if snake_unmatched:
command_lines.append(shell_rename(unmatched, snake_unmatched))
join_and_run_commands(command_lines)
elif command in ("agat_sp_fix_fusion.pl", "agat_sp_fix_longest_ORF.pl"):
# Special case: 4 output files with forced suffixes to handle.
# The same suffixes are applied to both subcommands
prefix = f"{tempdir}/snake_out_fix_fusion"
extra += f" --output {prefix} "
extra += parse_args(snakemake.input)
command_lines = [f"{command} {extra}"]
# Make output file available on use request
for expected_output in ("all", "intact", "only_modified", "report"):
snake_out = snakemake.output.get(expected_output)
if snake_out:
ext = "txt" if expected_output == "report" else "gff"
# Protect bash-interpretable characters in file names
command_lines.append(
shell_rename(f"{prefix}-{expected_output}.{ext}", snake_out)
)
join_and_run_commands(command_lines)
elif command == "agat_sp_manage_UTRs.pl":
# Special case. This command produces between 2 and 4 files, depending on
# command line parameters.
# This command does not parse output path name like the others. Only the basename
# of the output path provided in `--output` is kept. This leads to potential
# output file name collision: we need to move into the tempdir to execute
# the command line.
command_lines = []
prefix = f"snake_out_manage_utr"
extra += f" --output {shlex.quote(prefix)} "
# Activate histograms on user request
if any(path.endswith(".pdf") for path in snakemake.output):
extra += " --plot "
for input_file_path in snakemake.input:
arg = "--ref"
if input_file_path.endswith(".yaml"):
arg = "--config"
tempname = f"{tempdir}/{os.path.basename(input_file_path)}"
command_lines.append(
"ln --symbolic --force --relative --verbose "
f"{shlex.quote(str(input_file_path))} "
f"{shlex.quote(tempname)} "
)
extra += f" {arg} {shlex.quote(tempname)} "
command_lines += [
f"cd {tempdir}",
f"{command} {extra}",
f"cd -",
]
# Default threshold value (used in output suffix)
threshold = find_arg_value(
extra=extra,
param_name=("-n", "-g", "--nb", "--number"),
default="5",
)
# Make output file(s) available on user request
expected_output_files = {
"five_prime_utr_overORequal": f"{tempdir}/{prefix}/five_prime_utr_overORequal{threshold}.pdf",
"five_prime_utr_under": f"{tempdir}/{prefix}/five_prime_utr_under{threshold}.pdf",
"three_prime_utr_overORequal": f"{tempdir}/{prefix}/three_prime_utr_overORequal{threshold}.pdf",
"three_prime_utr_under": f"{tempdir}/{prefix}/three_prime_utr_under{threshold}.pdf",
"both_utr_overORequal": f"{tempdir}/{prefix}/1_UTR3_overORequal{threshold}_and_UTR5_overORequal{threshold}.gff",
"both_utr_under": f"{tempdir}/{prefix}/1_UTR3_under{threshold}_and_UTR5_under{threshold}.gff",
"report": f"{tempdir}/{prefix}/report.txt",
}
command_lines += list(
move_multiple_files(expected_output_files, snakemake.output)
)
join_and_run_commands(command_lines)
elif command == "agat_sp_manage_introns.pl":
# Special case: 1 to 5 files are created depending on command line
# parameters. Quite similar with the previous case, yet the list
# of output file differs.
# Build command line
if any(path.endswith(".pdf") for path in snakemake.output):
extra += " --plot "
extra += parse_args(snakemake.input)
# The subdir is required here, since the output path cannot end with "/" or
# be a directory. Or else, an error is raised with output files not being
# at their expected location
extra += f" --output {tempdir}/snake_out "
command_lines = [f"{command} {extra}"]
expected_output_files = {
"cds_pdf": f"{tempdir}/snake_out/intronPlot_cds.pdf",
"exon_pdf": f"{tempdir}/snake_out/intronPlot_exon.pdf",
"five_prime_utr_pdf": f"{tempdir}/snake_out/intronPlot_five_prime_utr.pdf",
"three_prime_utr_pdf": f"{tempdir}/snake_out/intronPlot_three_prime_utr.pdf",
"report": f"{tempdir}/snake_out/report.txt",
}
command_lines += list(
move_multiple_files(expected_output_files, snakemake.output)
)
join_and_run_commands(command_lines)
elif command == "agat_sp_manage_functional_annotation.pl":
# Special case: Multiple optional output files, 3 to 10 files are
# created by this command depending on the available input.
# Build command line
prefix = f"{tempdir}/snake_out"
extra += parse_args(snakemake.input)
extra += f" --output {prefix} "
command_lines = [f"{command} {extra}"]
# Make output available on user request
gff_basename = get_gff_basename(snakemake.input)
expected_output_files = {
"gff": f"{prefix}/{gff_basename}",
"report": f"{prefix}/report.txt",
"error": f"{prefix}/error.txt",
"duplicates": f"{prefix}/duplicatedNameFromBlast.txt",
"cdd": f"{prefix}/CDD.txt",
"go": f"{prefix}/GO.txt",
"interpro": f"{prefix}/InterPro.txt",
"mobidb": f"{prefix}/MobiDBLite.txt",
"panther": f"{prefix}/PANTHER.txt",
"superfamily": f"{prefix}/SUPERFAMILY.txt",
}
command_lines += list(
move_multiple_files(expected_output_files, snakemake.output)
)
join_and_run_commands(command_lines)
elif command == "agat_sp_statistics.pl":
# Special case: 1 to 2 files depending on command line and a whole
# directory based on the content of the input file.
# Build command line
prefix = f"{tempdir}/snake_out"
extra += parse_args(snakemake.input)
extra += f" --output {prefix} "
if snakemake.output.get("yaml"):
extra += " --yaml "
if snakemake.output.get("plot"):
extra += " -p "
command_lines = [f"{command} {extra}"]
# Make output available on user request
expected_output_files = {
"report": prefix,
"yaml": f"{prefix}.yaml",
"plot": f"{prefix}_distribution_plots",
}
command_lines += list(
move_multiple_files(expected_output_files, snakemake.output)
)
join_and_run_commands(command_lines)
elif command == "agat_convert_sp_gff2zff.pl":
# Special case: 2 files created
# Build command line
prefix = f"{tempdir}/snake_out"
extra += f" --output {prefix} "
# Agat uses onle basename of prefixes, we have to move into
# the temporary directory to avoid filename collisions
command_lines = []
for argname, input_file_path in dict(snakemake.input).items():
basename = os.path.basename(input_file_path)
tempname = f"{tempdir}/{basename}"
command_lines.append(
"ln --symbolic --force --relative --verbose "
f"{shlex.quote(input_file_path)} {shlex.quote(tempname)}"
)
dash = "-"
if len(argname) > 1:
dash = "--"
extra += f" {dash}{argname} {shlex.quote(basename)} "
command_lines += [
f"cd {tempdir}",
f"{command} {extra}",
"cd -",
]
# Make output available on user request
expected_output_files = {
"ann": f"{prefix}.ann",
"dna": f"{prefix}.dna",
}
command_lines += list(
move_multiple_files(expected_output_files, snakemake.output)
)
join_and_run_commands(command_lines)
else:
# Generic case, will work for most of the agat subcommands.
# While subcommnds usully answer the same command line interface,
# some of them have unexpected changes in argument names.
# IO arguments will be acquired from snakemake rule IO keys.
io = {**snakemake.input, **snakemake.output}
extra += parse_args(io)
# Special case of agat_sq_add_attributes_from_tsv.pl in
# which TSV/CSV format is not auto-detected and therefore
# an extra parameter shall be added.
# Here, we use input file extension to fill command line argument.
if command == "agat_sq_add_attributes_from_tsv.pl":
if str(snakemake.input.tsv).endswith("csv"):
extra += " --csv "
join_and_run_commands([f"{command} {extra}"])