From 09c1d4ab75f5cd057ed57bf694217f0e355f785e Mon Sep 17 00:00:00 2001 From: Rand Zoabi <58028465+RZ9082@users.noreply.github.com> Date: Mon, 11 Nov 2024 11:16:00 +0100 Subject: [PATCH] Cmsearch: added missing args to command (#1328) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added missing args to command * bump version * Bump infernal version, update cmscan test output, and modify cmpress command section * cmsearch version suffix back to 0 * Lint warnings * change mapali_cond option order in cmalign * Lint warning * Fix cmalign * data_table set up * Move DB selector to macros and add cached DB tests * Re-add unintentionally removed tests * Fix cmscan --------- Co-authored-by: Björn Grüning Co-authored-by: paulzierep --- tools/rna_tools/infernal/cmalign.xml | 51 +++++++++---------- tools/rna_tools/infernal/cmbuild.xml | 20 ++++---- tools/rna_tools/infernal/cmpress.xml | 51 ++++++++----------- tools/rna_tools/infernal/cmscan.xml | 51 ++++++++----------- tools/rna_tools/infernal/cmsearch.xml | 34 +++++-------- tools/rna_tools/infernal/cmstat.xml | 34 ++++++------- tools/rna_tools/infernal/macros.xml | 27 ++++++++-- .../rna_tools/infernal/test-data/infernal.loc | 21 ++++++++ .../infernal/test-data/test_cmscan.tabular | 10 ++-- .../infernal/tool_data_table_conf.xml.sample | 7 +++ .../infernal/tool_data_table_conf.xml.test | 7 +++ 11 files changed, 169 insertions(+), 144 deletions(-) create mode 100644 tools/rna_tools/infernal/test-data/infernal.loc create mode 100644 tools/rna_tools/infernal/tool_data_table_conf.xml.sample create mode 100644 tools/rna_tools/infernal/tool_data_table_conf.xml.test diff --git a/tools/rna_tools/infernal/cmalign.xml b/tools/rna_tools/infernal/cmalign.xml index 1f72febe0d..a6ecee7dfb 100644 --- a/tools/rna_tools/infernal/cmalign.xml +++ b/tools/rna_tools/infernal/cmalign.xml @@ -3,9 +3,9 @@ macros.xml + - - - - - - - - - - - - - - - - - - - - - + @@ -105,8 +86,11 @@ - + + + + - + + + + @@ -189,6 +176,18 @@ + + + + + + + + + + + + macros.xml + - align with the CYK algorithm - - - + + + + @@ -325,7 +328,7 @@ - + @@ -338,7 +341,7 @@ - + @@ -422,8 +425,7 @@ These options control how consensus columns are defined in an alignment. cmbuild uses an ad hoc sequence weighting algorithm to downweight closely related sequences and upweight distantly related ones. This has the effect of making models less biased by uneven phylogenetic representation. For example, two identical sequences would typically each receive half the weight that one sequence would. These options control which algorithm gets used. - - *--wgb*: Use the Henikoff position-based sequence weighting scheme [Henikoff -and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default. + - *--wgb*: Use the Henikoff position-based sequence weighting scheme [Henikoff and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default. - *--wgsc*: Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J. Mol. Biol. 235:1067, 1994]. - *--wnone*: Turn sequence weighting off; e.g. explicitly set all sequence weights to 1.0. - *--wgiven*: Use sequence weights as given in annotation in the input alignment file. If no weights were given, assume they are all 1.0. The default is to determine new sequence weights by the Gerstein/Sonnhammer/Chothia algorithm, ignoring any annotated weights. diff --git a/tools/rna_tools/infernal/cmpress.xml b/tools/rna_tools/infernal/cmpress.xml index e2525e490a..566d880599 100644 --- a/tools/rna_tools/infernal/cmpress.xml +++ b/tools/rna_tools/infernal/cmpress.xml @@ -1,47 +1,27 @@ - + Prepare a covariance model database for cmscan macros.xml + - - + - - - - - - - - - - - - - - - - - - - + @@ -58,7 +38,16 @@ + + + + + + + + + - + \ No newline at end of file diff --git a/tools/rna_tools/infernal/cmscan.xml b/tools/rna_tools/infernal/cmscan.xml index d90f757cac..b0322c054f 100644 --- a/tools/rna_tools/infernal/cmscan.xml +++ b/tools/rna_tools/infernal/cmscan.xml @@ -3,19 +3,21 @@ macros.xml + - - + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + diff --git a/tools/rna_tools/infernal/cmsearch.xml b/tools/rna_tools/infernal/cmsearch.xml index dce8caaf52..fafed5fbf3 100644 --- a/tools/rna_tools/infernal/cmsearch.xml +++ b/tools/rna_tools/infernal/cmsearch.xml @@ -4,9 +4,9 @@ macros.xml + - - - - - - - - - - - - - - - - - - - - + @@ -241,7 +225,7 @@ - + @@ -249,6 +233,14 @@ + + + + + + + + diff --git a/tools/rna_tools/infernal/cmstat.xml b/tools/rna_tools/infernal/cmstat.xml index 115f45a8fc..f946745618 100644 --- a/tools/rna_tools/infernal/cmstat.xml +++ b/tools/rna_tools/infernal/cmstat.xml @@ -3,9 +3,9 @@ macros.xml + - - - - - - - - - - - - - - - - - - - + @@ -61,6 +44,19 @@ + + + + + + + + + + + + + - infernal - infernal - coreutils + infernal + coreutils - 1.1.4 + 1.1.5 0 infernal + + + + + + + + + + + + + + + + + + + + diff --git a/tools/rna_tools/infernal/test-data/infernal.loc b/tools/rna_tools/infernal/test-data/infernal.loc new file mode 100644 index 0000000000..ca529f3a7a --- /dev/null +++ b/tools/rna_tools/infernal/test-data/infernal.loc @@ -0,0 +1,21 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of infernal covariance models, using three columns tab separated +#(longer whitespace are TAB characters): +# +#The entries are as follows: +# +# +# +#Your infernal.loc file should include an entry per line for each "base name" +#you have stored. For example: +# +#rfam01 RFAM 4.2 08 Aug 2013 /data/0/galaxy_data/infernal/08_08_2013/rfam.cm +# +#...etc... +# + +cmstat_input.cm Test case: cmstat_input ${__HERE__}/cmstat_input.cm +cmsearch_input1.cm Test case: cmsearch_input1 ${__HERE__}/cmsearch_input1.cm +minifam.cm Test case: minifam.cm ${__HERE__}/minifam.cm +cmpress_input_minifam.cm Test case: cmpress_input_minifam.cm ${__HERE__}/cmpress_input_minifam.cm +cmalign_input2_tRNA5.c.cm Test case: cmalign_input2_tRNA5.c.cm ${__HERE__}/cmalign_input2_tRNA5.c.cm diff --git a/tools/rna_tools/infernal/test-data/test_cmscan.tabular b/tools/rna_tools/infernal/test-data/test_cmscan.tabular index cfd749f2b0..c84463a9d4 100644 --- a/tools/rna_tools/infernal/test-data/test_cmscan.tabular +++ b/tools/rna_tools/infernal/test-data/test_cmscan.tabular @@ -5,11 +5,11 @@ tRNA5 - AAGA01015927.1 - cm 1 72 314 386 + no 1 0.59 0.0 53.5 6e-16 ! Cobalamin RF00174 AAFY01022046.1 - cm 32 191 934 832 - 5' 2 0.48 0.0 30.0 6.1e-09 ! Cobalamin riboswitch # # Program: cmscan -# Version: 1.1.4 (Dec 2020) +# Version: 1.1.5 (Sep 2023) # Pipeline mode: SCAN -# Query file: /tmp/tmp6jwp47cs/files/a/6/f/dataset_a6fe922b-72aa-4753-b4dd-cd3c716a4ae5.dat +# Query file: /tmp/tmpst0r7qh9/files/7/3/7/dataset_7373cb0d-0d12-49b5-96e0-e1aabaa6db38.dat # Target file: cmdb.cm -# Option settings: cmscan -o /dev/null --tblout $temp_tabular_output --default --cpu 1 cmdb.cm /tmp/tmp6jwp47cs/files/a/6/f/dataset_a6fe922b-72aa-4753-b4dd-cd3c716a4ae5.dat -# Current dir: /tmp/tmp6jwp47cs/job_working_directory/000/13/working -# Date: Thu Sep 23 15:21:39 2021 +# Option settings: cmscan -o /dev/null --tblout $temp_tabular_output --default --cpu 1 cmdb.cm /tmp/tmpst0r7qh9/files/7/3/7/dataset_7373cb0d-0d12-49b5-96e0-e1aabaa6db38.dat +# Current dir: /tmp/tmpst0r7qh9/job_working_directory/000/4/working +# Date: Thu Oct 17 11:28:32 2024 # [ok] diff --git a/tools/rna_tools/infernal/tool_data_table_conf.xml.sample b/tools/rna_tools/infernal/tool_data_table_conf.xml.sample new file mode 100644 index 0000000000..6f694f25cf --- /dev/null +++ b/tools/rna_tools/infernal/tool_data_table_conf.xml.sample @@ -0,0 +1,7 @@ + + + + value, name, path + +
+
\ No newline at end of file diff --git a/tools/rna_tools/infernal/tool_data_table_conf.xml.test b/tools/rna_tools/infernal/tool_data_table_conf.xml.test new file mode 100644 index 0000000000..5d68755faa --- /dev/null +++ b/tools/rna_tools/infernal/tool_data_table_conf.xml.test @@ -0,0 +1,7 @@ + + + + value, name, path + +
+
\ No newline at end of file