-
Notifications
You must be signed in to change notification settings - Fork 436
/
biobox_add_taxid.xml
128 lines (114 loc) · 6.04 KB
/
biobox_add_taxid.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
<tool id="biobox_add_taxid" name="Biobox add taxid" version="@SCRIPT_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Add taxid output from BAT or GTDB to biobox binning data</description>
<macros>
<import>macros.xml</import>
<token name="@SCRIPT_VERSION@">0.6</token>
</macros>
<requirements>
<requirement type="package" version="@SCRIPT_VERSION@">biobox_add_taxid</requirement>
</requirements>
<command detect_errors="exit_code">
<![CDATA[
mkdir -p input taxonkit gtdb_to_taxdump &&
ln -s '${biobox_file}' '$biobox_file.element_identifier' &&
#for $i, $f in enumerate($tool_type.input):
ln -s '$f' 'input/${i}.tsv' &&
#end for
#if $tool_type.is_select == 'GTDB':
#for $i, $f in enumerate($gtdb_to_taxdump):
ln -s '$f' 'gtdb_to_taxdump/gtdb_to_taxdump_${i}.tsv' &&
#end for
#for $i, $f in enumerate($taxonkit):
ln -s '$f' 'taxonkit/taxonkit_${i}.tsv' &&
#end for
#end if
biobox_add_taxid.py
'$biobox_file.element_identifier'
'$tool_type.is_select'
'input'
#if $tool_type.is_select == 'GTDB':
-g 'gtdb_to_taxdump'
-t 'taxonkit'
-c $column
#end if
&&
#if $tool_type.is_select == 'GTDB':
cp *_add_taxid_GTDB* $output
#else:
cp *_add_taxid_BAT* $output
#end if
]]>
</command>
<inputs>
<param name="biobox_file" type="data" format="tabular" label="CAMI amber biobox file input" help="Input the CAMI amber biobox file here which are corespond with the biner and the BAT/GTDB output which did used the biner as input! "/>
<conditional name="tool_type">
<param name="is_select" type="select" label="Select the tool which output should be used here" help="Select BAT when you use the bin2classifier file(s) or select GTDB when using the summary file(s)">
<option value="BAT">BAT</option>
<option value="GTDB">GTDB</option>
</param>
<when value="GTDB">
<param argument="--gtdb_to_taxdump" type="data" multiple="true" format="tabular" label="Input the output from gtdb_to_taxdump here" help="Use the output from gtdb_to_taxdump here since we need the mapped names from GTDB to NCBI to get the arcoding taxids from NCBI"/>
<param argument="--taxonkit" type="data" format="tabular" multiple="true" label="Input the output from Taxonkit here" help="Use the output from Taxonkit here since the need the mapped NCBI names to the arcording taxids"/>
<param argument="--column" type="data_column" data_ref="taxonkit" label="Input the colum with the NCBI names"/>
<param name="input" type="data" multiple="true" format="tabular" label="Input the GTDB-Tk summary file(s) here"
help="Use the GTDB-Tk file(s) which are coresponding with the binning file!"/>
</when>
<when value="BAT">
<param name="input" type="data" multiple="true" format="tabular" label="Input bin2classifier file(s) from BAT here"
help="Use the BAT file(s) which are coresponding with the binning file!"/>
</when>
</conditional>
</inputs>
<outputs>
<data name="output" format="tabular" label="${tool.name}: BIOBOX ADD TAXID COLUMN"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="biobox_file" ftype="tabular" value="test_biobox_file.tsv"/>
<conditional name="tool_type">
<param name="is_select" value="BAT"/>
<param name="input" ftype="tabular" value="test_bat_file.tsv"/>
</conditional>
<output name="output" file="test_biobox_file_add_taxid_bat.tsv"/>
</test>
<test expect_num_outputs="1">
<param name="biobox_file" ftype="tabular" value="test_biobox_file.tsv"/>
<conditional name="tool_type">
<param name="is_select" value="GTDB"/>
<param name="gtdb_to_taxdump" ftype="tabular" value="test_gtdb_to_taxdump_file.tsv"/>
<param name="taxonkit" ftype="tabular" value="test_taxonkit_file_1.tsv,test_taxonkit_file_2.tsv"/>
<param name="column" value="1"/>
<param name="input" ftype="tabular" value="test_gtdb_file.tsv"/>
</conditional>
<output name="output" file="test_biobox_file_add_taxid_gtdb.tsv"/>
</test>
</tests>
<help>
<![CDATA[
**USAGE OF THIS TOOL**
This tool was desgin for using the BAT/GTDB-Tk output to expand the CAMI amber biobox file with the taxid column to access taxa evaluation with amber!
**IMPORTAND NOTE WHEN USING GTDB OUTPUT**
Befor using GTDB-Tk output you have to use 2 other tools to map the GTDB names to the NCBI names and then map the NCBI names to there taxids.
To map the GTDB names to the NCBI names you extract the GTDB names from the summary file and with them you can now use the gtdb_to_taxdum tool!
After using the gtdb_to_taxdum tool you have the extract the NCBI names here and then use the tool Taxonkit[name2taxid] to finaly get the taxids.
**INPUTS**
- The CAMI amber biobox file
- BAT[bin2classifier] file(s) OR GTDB-Tk[Summary] file(s)
- The gtdb_to_taxdum output[WHEN USING GTDB-Tk]
- The Taxonkit[name2taxid] output[WHEN USING GTDB-Tk]
**OUTPUT**
A CAMI amber biobox file with a taxid colum
]]>
</help>
<citations>
<citation type="bibtex">@misc{BibEntry2024Aug,
title = {{biobox{$\_$}add{$\_$}taxid}},
author = {Santino Faack (SantaMcCloud)},
journal = {GitHub},
year = {2024},
month = aug,
note = {[Online; accessed 22. Aug. 2024]},
url = {https://github.com/SantaMcCloud/biobox_add_taxid/tree/release-0.3}
}</citation>
</citations>
</tool>