-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #517 from zargham-ahmad/issue507
Implemented dedicated require filters matchms tool
- Loading branch information
Showing
13 changed files
with
800 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
<tool id="matchms_remove_spectra" name="matchms remove spectra" version="@TOOL_VERSION@+galaxy0" profile="21.09"> | ||
<description>Filters spectra based on metadata presence</description> | ||
|
||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
|
||
<expand macro="creator"/> | ||
|
||
<edam_operations> | ||
<edam_operation>operation_3695</edam_operation> | ||
</edam_operations> | ||
|
||
<expand macro="bio.tools"/> | ||
|
||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">matchms</requirement> | ||
</requirements> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
python3 '${filter_spectra}' | ||
]]></command> | ||
|
||
<configfiles> | ||
<configfile name="filter_spectra"> | ||
from matchms.exporting import save_as_mgf, save_as_msp | ||
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import is_valid_inchi, is_valid_smiles, is_valid_inchikey | ||
from matchms.filtering import require_compound_name, require_formula, require_precursor_mz, \ | ||
require_retention_time, require_retention_index, require_valid_annotation | ||
from matchms.importing import load_from_mgf, load_from_msp | ||
|
||
#set metadata_fields = str("', '").join([str($f) for $f in $metadata_fields]) | ||
required_metadata = "$metadata_fields" | ||
|
||
if "$spectra.ext" == "msp": | ||
spectra = list(load_from_msp("${spectra}")) | ||
elif "$spectra.ext" == 'mgf': | ||
spectra = list(load_from_mgf("${spectra}")) | ||
else: | ||
raise ValueError(f'File format {$spectra.ext} not supported for mass spectra file.') | ||
|
||
filtered_spectra = [] | ||
removed_spectra = [] | ||
keep = False | ||
|
||
for spectrum in spectra: | ||
if 'smiles' in required_metadata: | ||
keep = is_valid_smiles(spectrum.get('smiles')) | ||
|
||
if 'inchi' in required_metadata: | ||
keep = is_valid_inchi(spectrum.get('inchi')) | ||
|
||
if 'inchikey' in required_metadata: | ||
keep = is_valid_inchikey(spectrum.get('inchikey')) | ||
|
||
if 'precursor_mz' in required_metadata: | ||
result = require_precursor_mz(spectrum) | ||
if result is not None: | ||
keep = True | ||
else: | ||
keep = False | ||
|
||
if 'valid_annotation' in required_metadata: | ||
result = require_valid_annotation(spectrum) | ||
if result is not None: | ||
keep = True | ||
else: | ||
keep = False | ||
|
||
if 'formula' in required_metadata: | ||
result = require_formula(spectrum) | ||
if result is not None: | ||
keep = True | ||
else: | ||
keep = False | ||
|
||
if 'compound_name' in required_metadata: | ||
result = require_compound_name(spectrum) | ||
if result is not None: | ||
keep = True | ||
else: | ||
keep = False | ||
|
||
if 'retention_time' in required_metadata: | ||
result = require_retention_time(spectrum) | ||
if result is not None: | ||
keep = True | ||
else: | ||
keep = False | ||
|
||
if 'retention_index' in required_metadata: | ||
result = require_retention_index(spectrum) | ||
if result is not None: | ||
keep = True | ||
else: | ||
keep = False | ||
|
||
if keep: | ||
filtered_spectra.append(spectrum) | ||
|
||
if "$spectra_removed" == "TRUE" and keep == False: | ||
removed_spectra.append(spectrum) | ||
|
||
|
||
if "$spectra.ext" == "msp": | ||
save_as_msp(filtered_spectra, "${output_filtered}") | ||
save_as_msp(removed_spectra, "${output_removed}") | ||
else: | ||
save_as_mgf(filtered_spectra, "${output_filtered}") | ||
save_as_mgf(removed_spectra, "${output_removed}") | ||
|
||
</configfile> | ||
</configfiles> | ||
|
||
<inputs> | ||
<param name="spectra" type="data" format="msp,mgf" label="Input Spectra File" help="Input file containing mass spectra"/> | ||
<param name="metadata_fields" type="select" multiple="true" label="Metadata Fields" help="Select metadata fields required in the spectra"> | ||
<option value="smiles">SMILES</option> | ||
<option value="inchi">InChI</option> | ||
<option value="inchikey">InChIKey</option> | ||
<option value="formula">Formula</option> | ||
<option value="retention_time">Retention Time</option> | ||
<option value="retention_index">Retention Index</option> | ||
<option value="precursor_mz">Precursor MZ</option> | ||
<option value="valid_annotation">Valid Annotation</option> | ||
<option value="compound_name">Compound Name</option> | ||
</param> | ||
<param name="spectra_removed" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Output Removed Spectra" help="Output spectra that were removed due to missing metadata"/> | ||
</inputs> | ||
|
||
<outputs> | ||
<data name="output_filtered" format_source="spectra" label="${tool.name} on ${on_string}: Filtered Spectra"/> | ||
<data name="output_removed" format_source="spectra" label="${tool.name} on ${on_string}: Removed Spectra"> | ||
<filter>spectra_removed</filter> | ||
</data> | ||
</outputs> | ||
|
||
<tests> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="smiles"/> | ||
<output name="output_filtered" file="remove_spectra/require_smiles.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="2"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="inchi"/> | ||
<param name="spectra_removed" value="TRUE"/> | ||
<output name="output_filtered" file="remove_spectra/require_inchi.msp" ftype="msp"/> | ||
<output name="output_removed" file="remove_spectra/require_inchi_removed_spectra.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="inchikey"/> | ||
<output name="output_filtered" file="remove_spectra/require_inchikey.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="formula"/> | ||
<output name="output_filtered" file="remove_spectra/require_formula.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="retention_time"/> | ||
<output name="output_filtered" file="remove_spectra/require_retention_time.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="retention_index"/> | ||
<output name="output_filtered" file="remove_spectra/require_retention_index.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="precursor_mz"/> | ||
<output name="output_filtered" file="remove_spectra/require_precursor_mz.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="compound_name"/> | ||
<output name="output_filtered" file="remove_spectra/require_compound_name.msp" ftype="msp"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/> | ||
<param name="metadata_fields" value="valid_annotation"/> | ||
<output name="output_filtered"> | ||
<assert_contents> | ||
<has_n_lines n="0"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
|
||
<help><![CDATA[ | ||
This tool filters input mass spectra based on the presence of specified metadata fields. Spectra missing any of the selected metadata fields are optionally logged and output separately. | ||
Valid Annotation filter removes spectra that are not fully annotated (correct and matching, smiles, inchi and inchikey) | ||
]]></help> | ||
|
||
<expand macro="citations"/> | ||
</tool> |
Oops, something went wrong.