Skip to content

Commit

Permalink
Merge pull request #517 from zargham-ahmad/issue507
Browse files Browse the repository at this point in the history
Implemented dedicated require filters matchms tool
  • Loading branch information
hechth authored May 30, 2024
2 parents bc3445f + 0efb53d commit 113433b
Show file tree
Hide file tree
Showing 13 changed files with 800 additions and 116 deletions.
24 changes: 1 addition & 23 deletions tools/matchms/matchms_filtering.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy1" profile="21.09">
<description>filter and normalize mass spectrometry data</description>

<macros>
Expand Down Expand Up @@ -49,12 +49,6 @@
--from_mz "$mz_range.from_mz" \
--to_mz "$mz_range.to_mz" \
#end if
#if $require_smiles_is_true == "TRUE"
-require_smiles \
#end if
#if $require_inchi_is_true == "TRUE"
-require_inchi \
#end if
#if $derive_precursor_mz_from_parent_mass.is_true == "TRUE"
-derive_precursor_mz_from_parent_mass \
--estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \
Expand Down Expand Up @@ -102,12 +96,6 @@
<when value="FALSE"></when>
</conditional>

<param name="require_smiles_is_true" label="Require SMILES" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Remove spectra that does not contain SMILES." />

<param name="require_inchi_is_true" label="Require INCHI" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Remove spectra that does not contain INCHI." />

<conditional name="derive_precursor_mz_from_parent_mass">
<param name="is_true" label="Derive precursor_mz from parent_mass" type="select"
help="Derives the precursor_mz from the parent mass and adduct or charge.">
Expand Down Expand Up @@ -172,16 +160,6 @@
</section>
<output name="output" file="filtering/mz_range.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/require_filter.msp" ftype="msp"/>
<param name="require_smiles_is_true" value="TRUE"/>
<output name="output" file="filtering/require_out.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/require_filter.msp" ftype="msp"/>
<param name="require_inchi_is_true" value="TRUE"/>
<output name="output" file="filtering/require_out.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/input.msp" ftype="msp"/>
<section name="reduce_to_top_n_peaks">
Expand Down
21 changes: 0 additions & 21 deletions tools/matchms/matchms_filtering_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,9 @@
from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
select_by_relative_intensity
from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import is_valid_inchi, is_valid_smiles
from matchms.importing import load_from_mgf, load_from_msp


def require_key(spectrum, key, function):
value = spectrum.get(key)
if function(value):
return spectrum

return None


def main(argv):
parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.")
Expand All @@ -38,10 +29,6 @@ def main(argv):
help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).")
parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter")
parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter")
parser.add_argument("-require_smiles", action='store_true',
help="Remove spectra that does not contain SMILES.")
parser.add_argument("-require_inchi", action='store_true',
help="Remove spectra that does not contain INCHI.")
parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true',
help="Derives the precursor_mz from the parent mass and adduct or charge.")
parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.")
Expand All @@ -55,8 +42,6 @@ def main(argv):
or args.clean_metadata
or args.relative_intensity
or args.mz_range
or args.require_smiles
or args.require_inchi
or args.derive_precursor_mz_from_parent_mass
or args.reduce_to_top_n_peaks):
raise ValueError('No filter selected.')
Expand Down Expand Up @@ -96,12 +81,6 @@ def main(argv):
precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct)
spectrum.set("precursor_mz", precursor_mz)

if args.require_smiles and spectrum is not None:
spectrum = require_key(spectrum, "smiles", is_valid_smiles)

if args.require_inchi and spectrum is not None:
spectrum = require_key(spectrum, "inchi", is_valid_inchi)

if spectrum is not None:
filtered_spectra.append(spectrum)

Expand Down
197 changes: 197 additions & 0 deletions tools/matchms/matchms_remove_spectra.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
<tool id="matchms_remove_spectra" name="matchms remove spectra" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<description>Filters spectra based on metadata presence</description>

<macros>
<import>macros.xml</import>
</macros>

<expand macro="creator"/>

<edam_operations>
<edam_operation>operation_3695</edam_operation>
</edam_operations>

<expand macro="bio.tools"/>

<requirements>
<requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
python3 '${filter_spectra}'
]]></command>

<configfiles>
<configfile name="filter_spectra">
from matchms.exporting import save_as_mgf, save_as_msp
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import is_valid_inchi, is_valid_smiles, is_valid_inchikey
from matchms.filtering import require_compound_name, require_formula, require_precursor_mz, \
require_retention_time, require_retention_index, require_valid_annotation
from matchms.importing import load_from_mgf, load_from_msp

#set metadata_fields = str("', '").join([str($f) for $f in $metadata_fields])
required_metadata = "$metadata_fields"

if "$spectra.ext" == "msp":
spectra = list(load_from_msp("${spectra}"))
elif "$spectra.ext" == 'mgf':
spectra = list(load_from_mgf("${spectra}"))
else:
raise ValueError(f'File format {$spectra.ext} not supported for mass spectra file.')

filtered_spectra = []
removed_spectra = []
keep = False

for spectrum in spectra:
if 'smiles' in required_metadata:
keep = is_valid_smiles(spectrum.get('smiles'))

if 'inchi' in required_metadata:
keep = is_valid_inchi(spectrum.get('inchi'))

if 'inchikey' in required_metadata:
keep = is_valid_inchikey(spectrum.get('inchikey'))

if 'precursor_mz' in required_metadata:
result = require_precursor_mz(spectrum)
if result is not None:
keep = True
else:
keep = False

if 'valid_annotation' in required_metadata:
result = require_valid_annotation(spectrum)
if result is not None:
keep = True
else:
keep = False

if 'formula' in required_metadata:
result = require_formula(spectrum)
if result is not None:
keep = True
else:
keep = False

if 'compound_name' in required_metadata:
result = require_compound_name(spectrum)
if result is not None:
keep = True
else:
keep = False

if 'retention_time' in required_metadata:
result = require_retention_time(spectrum)
if result is not None:
keep = True
else:
keep = False

if 'retention_index' in required_metadata:
result = require_retention_index(spectrum)
if result is not None:
keep = True
else:
keep = False

if keep:
filtered_spectra.append(spectrum)

if "$spectra_removed" == "TRUE" and keep == False:
removed_spectra.append(spectrum)


if "$spectra.ext" == "msp":
save_as_msp(filtered_spectra, "${output_filtered}")
save_as_msp(removed_spectra, "${output_removed}")
else:
save_as_mgf(filtered_spectra, "${output_filtered}")
save_as_mgf(removed_spectra, "${output_removed}")

</configfile>
</configfiles>

<inputs>
<param name="spectra" type="data" format="msp,mgf" label="Input Spectra File" help="Input file containing mass spectra"/>
<param name="metadata_fields" type="select" multiple="true" label="Metadata Fields" help="Select metadata fields required in the spectra">
<option value="smiles">SMILES</option>
<option value="inchi">InChI</option>
<option value="inchikey">InChIKey</option>
<option value="formula">Formula</option>
<option value="retention_time">Retention Time</option>
<option value="retention_index">Retention Index</option>
<option value="precursor_mz">Precursor MZ</option>
<option value="valid_annotation">Valid Annotation</option>
<option value="compound_name">Compound Name</option>
</param>
<param name="spectra_removed" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Output Removed Spectra" help="Output spectra that were removed due to missing metadata"/>
</inputs>

<outputs>
<data name="output_filtered" format_source="spectra" label="${tool.name} on ${on_string}: Filtered Spectra"/>
<data name="output_removed" format_source="spectra" label="${tool.name} on ${on_string}: Removed Spectra">
<filter>spectra_removed</filter>
</data>
</outputs>

<tests>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="smiles"/>
<output name="output_filtered" file="remove_spectra/require_smiles.msp" ftype="msp"/>
</test>
<test expect_num_outputs="2">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="inchi"/>
<param name="spectra_removed" value="TRUE"/>
<output name="output_filtered" file="remove_spectra/require_inchi.msp" ftype="msp"/>
<output name="output_removed" file="remove_spectra/require_inchi_removed_spectra.msp" ftype="msp"/>
</test>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="inchikey"/>
<output name="output_filtered" file="remove_spectra/require_inchikey.msp" ftype="msp"/>
</test>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="formula"/>
<output name="output_filtered" file="remove_spectra/require_formula.msp" ftype="msp"/>
</test>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="retention_time"/>
<output name="output_filtered" file="remove_spectra/require_retention_time.msp" ftype="msp"/>
</test>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="retention_index"/>
<output name="output_filtered" file="remove_spectra/require_retention_index.msp" ftype="msp"/>
</test>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="precursor_mz"/>
<output name="output_filtered" file="remove_spectra/require_precursor_mz.msp" ftype="msp"/>
</test>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="compound_name"/>
<output name="output_filtered" file="remove_spectra/require_compound_name.msp" ftype="msp"/>
</test>
<test expect_num_outputs="1">
<param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
<param name="metadata_fields" value="valid_annotation"/>
<output name="output_filtered">
<assert_contents>
<has_n_lines n="0"/>
</assert_contents>
</output>
</test>
</tests>

<help><![CDATA[
This tool filters input mass spectra based on the presence of specified metadata fields. Spectra missing any of the selected metadata fields are optionally logged and output separately.
Valid Annotation filter removes spectra that are not fully annotated (correct and matching, smiles, inchi and inchikey)
]]></help>

<expand macro="citations"/>
</tool>
Loading

0 comments on commit 113433b

Please sign in to comment.