From 82d85e2d7c7ca5a8e48eb24cfbb9dcc71b8b9471 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 11 Jul 2024 10:06:16 +0100 Subject: [PATCH 1/3] Add support for merging ions. [closes #312] --- python/BioSimSpace/Align/_align.py | 272 +++++++++++------- .../Sandpit/Exscientia/Align/_align.py | 272 +++++++++++------- 2 files changed, 322 insertions(+), 222 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 1db09c51a..90839464f 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -1547,22 +1547,47 @@ def _rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_ma mol0 = molecule0._getSireObject() mol1 = molecule1._getSireObject() + # Do we have a monatomic ion? + if (molecule0.nAtoms() == 1) or (molecule1.nAtoms() == 1): + is_ion = True + else: + is_ion = False + # Convert the mapping to AtomIdx key:value pairs. sire_mapping = _to_sire_mapping(mapping) # Perform the alignment, mol0 to mol1. - try: + if len(mapping) == 1 and is_ion: + idx0 = list(mapping.keys())[0] + idx1 = list(mapping.values())[0] + # Replace the coordinates of the mapped atom with those of the reference. mol0 = ( - mol0.move().align(mol1, _SireMol.AtomResultMatcher(sire_mapping)).molecule() + mol0.edit() + .atom(idx0) + .setProperty( + property_map0.get("coordinates", "coordinates"), + mol1.atom(idx1).property( + property_map1.get("coordinates", "coordinates") + ), + ) + .molecule() + .commit() ) - except Exception as e: - msg = "Failed to align molecules based on mapping: %r" % mapping - if "Could not calculate the single value decomposition" in str(e): - msg += ". Try minimising your molecular coordinates prior to alignment." - if _isVerbose(): - raise _AlignmentError(msg) from e - else: - raise _AlignmentError(msg) from None + else: + try: + mol0 = ( + mol0.move() + .align(mol1, _SireMol.AtomResultMatcher(sire_mapping)) + .molecule() + ) + except Exception as e: + msg = "Failed to align molecules based on mapping: %r" % mapping + if "Could not calculate the single value decomposition" in str(e): + msg += ". Try minimising your molecular coordinates prior to alignment." + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None # Return the aligned molecule. return _Molecule(mol0) @@ -2509,6 +2534,12 @@ def _score_rdkit_mappings( .commit() ) + # Do we have a monatomic ion? + if (molecule0.nAtoms() == 1) or (molecule1.nAtoms() == 1): + is_ion = True + else: + is_ion = False + # Get the set of matching substructures in each molecule. For some reason # setting uniquify to True removes valid matches, in some cases even the # best match! As such, we set uniquify to False and account for duplicate @@ -2575,61 +2606,68 @@ def _score_rdkit_mappings( break if is_valid: - # Rigidly align molecule0 to molecule1 based on the mapping. - if scoring_function == "RMSDALIGN": - try: - molecule0 = ( - molecule0.move() - .align( - molecule1, _SireMol.AtomResultMatcher(sire_mapping) - ) - .molecule() - ) - except Exception as e: - if ( - "Could not calculate the single value decomposition" - in str(e) - ): - is_gsl_error = True - gsl_exception = e - else: - msg = ( - "Failed to align molecules when scoring based on mapping: %r" - % mapping + # If there is only a single atom in the mapping and one molecule + # has one atom, e.g. an ion, then skip the alignment. + if len(mapping) == 1 and is_ion: + mappings.append(mapping) + scores.append(0.0) + else: + # Rigidly align molecule0 to molecule1 based on the mapping. + if scoring_function == "RMSDALIGN": + try: + molecule0 = ( + molecule0.move() + .align( + molecule1, + _SireMol.AtomResultMatcher(sire_mapping), + ) + .molecule() ) - if _isVerbose(): - raise _AlignmentError(msg) from e + except Exception as e: + if ( + "Could not calculate the single value decomposition" + in str(e) + ): + is_gsl_error = True + gsl_exception = e else: - raise _AlignmentError(msg) from None - # Flexibly align molecule0 to molecule1 based on the mapping. - elif scoring_function == "RMSDFLEXALIGN": - molecule0 = flexAlign( - _Molecule(molecule0), - _Molecule(molecule1), - mapping, - property_map0=property_map0, - property_map1=property_map1, - )._sire_object - - # Append the mapping to the list. - mappings.append(mapping) - - # We now compute the RMSD between the coordinates of the matched atoms - # in molecule0 and molecule1. - - # Initialise lists to hold the coordinates. - c0 = [] - c1 = [] - - # Loop over each atom index in the map. - for idx0, idx1 in sire_mapping.items(): - # Append the coordinates of the matched atom in molecule0. - c0.append(molecule0.atom(idx0).property("coordinates")) - # Append the coordinates of atom in molecule1 to which it maps. - c1.append(molecule1.atom(idx1).property("coordinates")) - - # Compute the RMSD between the two sets of coordinates. - scores.append(_SireMaths.getRMSD(c0, c1)) + msg = ( + "Failed to align molecules when scoring based on mapping: %r" + % mapping + ) + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None + # Flexibly align molecule0 to molecule1 based on the mapping. + elif scoring_function == "RMSDFLEXALIGN": + molecule0 = flexAlign( + _Molecule(molecule0), + _Molecule(molecule1), + mapping, + property_map0=property_map0, + property_map1=property_map1, + )._sire_object + + # Append the mapping to the list. + mappings.append(mapping) + + # We now compute the RMSD between the coordinates of the matched atoms + # in molecule0 and molecule1. + + # Initialise lists to hold the coordinates. + c0 = [] + c1 = [] + + # Loop over each atom index in the map. + for idx0, idx1 in sire_mapping.items(): + # Append the coordinates of the matched atom in molecule0. + c0.append(molecule0.atom(idx0).property("coordinates")) + # Append the coordinates of atom in molecule1 to which it maps. + c1.append(molecule1.atom(idx1).property("coordinates")) + + # Compute the RMSD between the two sets of coordinates. + scores.append(_SireMaths.getRMSD(c0, c1)) # No mappings were found. if len(mappings) == 0: @@ -2732,6 +2770,12 @@ def _score_sire_mappings( .commit() ) + # Do we have a monatomic ion? + if (molecule0.nAtoms() == 1) or (molecule1.nAtoms() == 1): + is_ion = True + else: + is_ion = False + # Initialise a list to hold the mappings. mappings = [] @@ -2751,54 +2795,60 @@ def _score_sire_mappings( break if is_valid: - # Rigidly align molecule0 to molecule1 based on the mapping. - if scoring_function == "RMSDALIGN": - try: - molecule0 = ( - molecule0.move() - .align(molecule1, _SireMol.AtomResultMatcher(mapping)) - .molecule() - ) - except Exception as e: - msg = ( - "Failed to align molecules when scoring based on mapping: %r" - % mapping - ) - if _isVerbose(): - raise _AlignmentError(msg) from e - else: - raise _AlignmentError(msg) from None - # Flexibly align molecule0 to molecule1 based on the mapping. - elif scoring_function == "RMSDFLEXALIGN": - molecule0 = flexAlign( - _Molecule(molecule0), - _Molecule(molecule1), - _from_sire_mapping(mapping), - property_map0=property_map0, - property_map1=property_map1, - )._sire_object - - # Append the mapping to the list. - mapping = _from_sire_mapping(mapping) - mapping = dict(sorted(mapping.items())) - mappings.append(mapping) - - # We now compute the RMSD between the coordinates of the matched atoms - # in molecule0 and molecule1. - - # Initialise lists to hold the coordinates. - c0 = [] - c1 = [] - - # Loop over each atom index in the map. - for idx0, idx1 in mapping.items(): - # Append the coordinates of the matched atom in molecule0. - c0.append(molecule0.atom(idx0).property("coordinates")) - # Append the coordinates of atom in molecule1 to which it maps. - c1.append(molecule1.atom(idx1).property("coordinates")) - - # Compute the RMSD between the two sets of coordinates. - scores.append(_SireMaths.getRMSD(c0, c1)) + # If there is only a single atom in the mapping and one molecule + # has one atom, e.g. an ion, then skip the alignment. + if len(mapping) == 1 and is_ion: + mappings.append(mapping) + scores.append(0.0) + else: + # Rigidly align molecule0 to molecule1 based on the mapping. + if scoring_function == "RMSDALIGN": + try: + molecule0 = ( + molecule0.move() + .align(molecule1, _SireMol.AtomResultMatcher(mapping)) + .molecule() + ) + except Exception as e: + msg = ( + "Failed to align molecules when scoring based on mapping: %r" + % mapping + ) + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None + # Flexibly align molecule0 to molecule1 based on the mapping. + elif scoring_function == "RMSDFLEXALIGN": + molecule0 = flexAlign( + _Molecule(molecule0), + _Molecule(molecule1), + _from_sire_mapping(mapping), + property_map0=property_map0, + property_map1=property_map1, + )._sire_object + + # Append the mapping to the list. + mapping = _from_sire_mapping(mapping) + mapping = dict(sorted(mapping.items())) + mappings.append(mapping) + + # We now compute the RMSD between the coordinates of the matched atoms + # in molecule0 and molecule1. + + # Initialise lists to hold the coordinates. + c0 = [] + c1 = [] + + # Loop over each atom index in the map. + for idx0, idx1 in mapping.items(): + # Append the coordinates of the matched atom in molecule0. + c0.append(molecule0.atom(idx0).property("coordinates")) + # Append the coordinates of atom in molecule1 to which it maps. + c1.append(molecule1.atom(idx1).property("coordinates")) + + # Compute the RMSD between the two sets of coordinates. + scores.append(_SireMaths.getRMSD(c0, c1)) # No mappings were found. if len(mappings) == 0: diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index 27731e95d..6b3522c39 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -1151,19 +1151,44 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map # Convert the mapping to AtomIdx key:value pairs. sire_mapping = _to_sire_mapping(mapping) + # Do we have a monatomic ion? + if (molecule0.nAtoms() == 1) or (molecule1.nAtoms() == 1): + is_ion = True + else: + is_ion = False + # Perform the alignment, mol0 to mol1. - try: + if len(mapping) == 1 and is_ion: + idx0 = list(mapping.keys())[0] + idx1 = list(mapping.values())[0] + # Replace the coordinates of the mapped atom with those of the reference. mol0 = ( - mol0.move().align(mol1, _SireMol.AtomResultMatcher(sire_mapping)).molecule() + mol0.edit() + .atom(idx0) + .setProperty( + property_map0.get("coordinates", "coordinates"), + mol1.atom(idx1).property( + property_map1.get("coordinates", "coordinates") + ), + ) + .molecule() + .commit() ) - except Exception as e: - msg = "Failed to align molecules based on mapping: %r" % mapping - if "Could not calculate the single value decomposition" in str(e): - msg += ". Try minimising your molecular coordinates prior to alignment." - if _isVerbose(): - raise _AlignmentError(msg) from e - else: - raise _AlignmentError(msg) from None + else: + try: + mol0 = ( + mol0.move() + .align(mol1, _SireMol.AtomResultMatcher(sire_mapping)) + .molecule() + ) + except Exception as e: + msg = "Failed to align molecules based on mapping: %r" % mapping + if "Could not calculate the single value decomposition" in str(e): + msg += ". Try minimising your molecular coordinates prior to alignment." + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None # Return the aligned molecule. return _Molecule(mol0) @@ -1760,6 +1785,12 @@ def _score_rdkit_mappings( .commit() ) + # Do we have a monatomic ion? + if (molecule0.nAtoms() == 1) or (molecule1.nAtoms() == 1): + is_ion = True + else: + is_ion = False + # Get the set of matching substructures in each molecule. For some reason # setting uniquify to True removes valid matches, in some cases even the # best match! As such, we set uniquify to False and account for duplicate @@ -1826,61 +1857,68 @@ def _score_rdkit_mappings( break if is_valid: - # Rigidly align molecule0 to molecule1 based on the mapping. - if scoring_function == "RMSDALIGN": - try: - molecule0 = ( - molecule0.move() - .align( - molecule1, _SireMol.AtomResultMatcher(sire_mapping) - ) - .molecule() - ) - except Exception as e: - if ( - "Could not calculate the single value decomposition" - in str(e) - ): - is_gsl_error = True - gsl_exception = e - else: - msg = ( - "Failed to align molecules when scoring based on mapping: %r" - % mapping + # If there is only a single atom in the mapping and one molecule + # has one atom, e.g. an ion, then skip the alignment. + if len(mapping) == 1 and is_ion: + mappings.append(mapping) + scores.append(0.0) + else: + # Rigidly align molecule0 to molecule1 based on the mapping. + if scoring_function == "RMSDALIGN": + try: + molecule0 = ( + molecule0.move() + .align( + molecule1, + _SireMol.AtomResultMatcher(sire_mapping), + ) + .molecule() ) - if _isVerbose(): - raise _AlignmentError(msg) from e + except Exception as e: + if ( + "Could not calculate the single value decomposition" + in str(e) + ): + is_gsl_error = True + gsl_exception = e else: - raise _AlignmentError(msg) from None - # Flexibly align molecule0 to molecule1 based on the mapping. - elif scoring_function == "RMSDFLEXALIGN": - molecule0 = flexAlign( - _Molecule(molecule0), - _Molecule(molecule1), - mapping, - property_map0=property_map0, - property_map1=property_map1, - )._sire_object - - # Append the mapping to the list. - mappings.append(mapping) - - # We now compute the RMSD between the coordinates of the matched atoms - # in molecule0 and molecule1. - - # Initialise lists to hold the coordinates. - c0 = [] - c1 = [] - - # Loop over each atom index in the map. - for idx0, idx1 in sire_mapping.items(): - # Append the coordinates of the matched atom in molecule0. - c0.append(molecule0.atom(idx0).property("coordinates")) - # Append the coordinates of atom in molecule1 to which it maps. - c1.append(molecule1.atom(idx1).property("coordinates")) - - # Compute the RMSD between the two sets of coordinates. - scores.append(_SireMaths.getRMSD(c0, c1)) + msg = ( + "Failed to align molecules when scoring based on mapping: %r" + % mapping + ) + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None + # Flexibly align molecule0 to molecule1 based on the mapping. + elif scoring_function == "RMSDFLEXALIGN": + molecule0 = flexAlign( + _Molecule(molecule0), + _Molecule(molecule1), + mapping, + property_map0=property_map0, + property_map1=property_map1, + )._sire_object + + # Append the mapping to the list. + mappings.append(mapping) + + # We now compute the RMSD between the coordinates of the matched atoms + # in molecule0 and molecule1. + + # Initialise lists to hold the coordinates. + c0 = [] + c1 = [] + + # Loop over each atom index in the map. + for idx0, idx1 in sire_mapping.items(): + # Append the coordinates of the matched atom in molecule0. + c0.append(molecule0.atom(idx0).property("coordinates")) + # Append the coordinates of atom in molecule1 to which it maps. + c1.append(molecule1.atom(idx1).property("coordinates")) + + # Compute the RMSD between the two sets of coordinates. + scores.append(_SireMaths.getRMSD(c0, c1)) # No mappings were found. if len(mappings) == 0: @@ -1983,6 +2021,12 @@ def _score_sire_mappings( .commit() ) + # Do we have a monatomic ion? + if (molecule0.nAtoms() == 1) or (molecule1.nAtoms() == 1): + is_ion = True + else: + is_ion = False + # Initialise a list to hold the mappings. mappings = [] @@ -2002,54 +2046,60 @@ def _score_sire_mappings( break if is_valid: - # Rigidly align molecule0 to molecule1 based on the mapping. - if scoring_function == "RMSDALIGN": - try: - molecule0 = ( - molecule0.move() - .align(molecule1, _SireMol.AtomResultMatcher(mapping)) - .molecule() - ) - except Exception as e: - msg = ( - "Failed to align molecules when scoring based on mapping: %r" - % mapping - ) - if _isVerbose(): - raise _AlignmentError(msg) from e - else: - raise _AlignmentError(msg) from None - # Flexibly align molecule0 to molecule1 based on the mapping. - elif scoring_function == "RMSDFLEXALIGN": - molecule0 = flexAlign( - _Molecule(molecule0), - _Molecule(molecule1), - _from_sire_mapping(mapping), - property_map0=property_map0, - property_map1=property_map1, - )._sire_object - - # Append the mapping to the list. - mapping = _from_sire_mapping(mapping) - mapping = dict(sorted(mapping.items())) - mappings.append(mapping) - - # We now compute the RMSD between the coordinates of the matched atoms - # in molecule0 and molecule1. - - # Initialise lists to hold the coordinates. - c0 = [] - c1 = [] - - # Loop over each atom index in the map. - for idx0, idx1 in mapping.items(): - # Append the coordinates of the matched atom in molecule0. - c0.append(molecule0.atom(idx0).property("coordinates")) - # Append the coordinates of atom in molecule1 to which it maps. - c1.append(molecule1.atom(idx1).property("coordinates")) - - # Compute the RMSD between the two sets of coordinates. - scores.append(_SireMaths.getRMSD(c0, c1)) + # If there is only a single atom in the mapping and one molecule + # has one atom, e.g. an ion, then skip the alignment. + if len(mapping) == 1 and is_ion: + mappings.append(mapping) + scores.append(0.0) + else: + # Rigidly align molecule0 to molecule1 based on the mapping. + if scoring_function == "RMSDALIGN": + try: + molecule0 = ( + molecule0.move() + .align(molecule1, _SireMol.AtomResultMatcher(mapping)) + .molecule() + ) + except Exception as e: + msg = ( + "Failed to align molecules when scoring based on mapping: %r" + % mapping + ) + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None + # Flexibly align molecule0 to molecule1 based on the mapping. + elif scoring_function == "RMSDFLEXALIGN": + molecule0 = flexAlign( + _Molecule(molecule0), + _Molecule(molecule1), + _from_sire_mapping(mapping), + property_map0=property_map0, + property_map1=property_map1, + )._sire_object + + # Append the mapping to the list. + mapping = _from_sire_mapping(mapping) + mapping = dict(sorted(mapping.items())) + mappings.append(mapping) + + # We now compute the RMSD between the coordinates of the matched atoms + # in molecule0 and molecule1. + + # Initialise lists to hold the coordinates. + c0 = [] + c1 = [] + + # Loop over each atom index in the map. + for idx0, idx1 in mapping.items(): + # Append the coordinates of the matched atom in molecule0. + c0.append(molecule0.atom(idx0).property("coordinates")) + # Append the coordinates of atom in molecule1 to which it maps. + c1.append(molecule1.atom(idx1).property("coordinates")) + + # Compute the RMSD between the two sets of coordinates. + scores.append(_SireMaths.getRMSD(c0, c1)) # No mappings were found. if len(mappings) == 0: From c8246ed1c7cea18bf184822e3370c5e6d36e117e Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 11 Jul 2024 10:42:45 +0100 Subject: [PATCH 2/3] Add unit test for ion merge. --- tests/Align/test_align.py | 22 +++++++++++++++++ tests/Sandpit/Exscientia/Align/test_align.py | 25 ++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index 341e4bf4f..44c53a74c 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -673,3 +673,25 @@ def test_roi_merge(protein_inputs): merged = BSS.Align.merge(aligned_p0, p1, protein_mapping, roi=roi) merged_system = merged.toSystem() assert merged_system.nPerturbableMolecules() == 1 + + +def test_ion_merge(system): + from sire.legacy.IO import createSodiumIon + + # Extract a water molecule. + water = system[-1] + + # Create a sodium ion using the water coordinates. + ion = createSodiumIon( + water.getAtoms()[0]._sire_object.property("coordinates"), "tip3p" + ) + + # Merge the water and ion. + merged = BSS.Align.merge(water, BSS._SireWrappers.Molecule(ion)) + + # Make sure the ion has the coordintes of the oxygen atom. + coords0 = merged._sire_object.property("coordinates0").toVector()[0] + coords1 = merged._sire_object.property("coordinates1").toVector()[0] + water_coords = water._sire_object.property("coordinates").toVector()[0] + assert coords0 == coords1 + assert coords0 == water_coords diff --git a/tests/Sandpit/Exscientia/Align/test_align.py b/tests/Sandpit/Exscientia/Align/test_align.py index d493b1624..12d8e68a4 100644 --- a/tests/Sandpit/Exscientia/Align/test_align.py +++ b/tests/Sandpit/Exscientia/Align/test_align.py @@ -725,3 +725,28 @@ def test_hydrogen_mass_repartitioning(): assert mass0 == masses1[idx] for idx, mass1 in dummy_masses1: assert mass1 == masses0[idx] + + +def test_ion_merge(): + from sire.legacy.IO import createSodiumIon + from tests.conftest import root_fp + + # Extract a water molecule from the system. + water = BSS.IO.readMolecules( + [f"{root_fp}/input/ala.crd", f"{root_fp}/input/ala.top"] + )[-1] + + # Create a sodium ion using the water coordinates. + ion = createSodiumIon( + water.getAtoms()[0]._sire_object.property("coordinates"), "tip3p" + ) + + # Merge the water and ion. + merged = BSS.Align.merge(water, BSS._SireWrappers.Molecule(ion)) + + # Make sure the ion has the coordintes of the oxygen atom. + coords0 = merged._sire_object.property("coordinates0").toVector()[0] + coords1 = merged._sire_object.property("coordinates1").toVector()[0] + water_coords = water._sire_object.property("coordinates").toVector()[0] + assert coords0 == coords1 + assert coords0 == water_coords From edadeeec3967bcf4d53864a732bb71ac9cc38f41 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 11 Jul 2024 13:05:26 +0100 Subject: [PATCH 3/3] Exclude sander.LES from allowed executables. --- python/BioSimSpace/Process/_amber.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/BioSimSpace/Process/_amber.py b/python/BioSimSpace/Process/_amber.py index 8140729a4..f9b5e21ca 100644 --- a/python/BioSimSpace/Process/_amber.py +++ b/python/BioSimSpace/Process/_amber.py @@ -2856,9 +2856,11 @@ def is_exe(fpath): # order their path accordingly, or use the exe keyword argument. if results: for exe in results: - exe = _pathlib.Path(p) / exe - if is_exe(exe): - return str(exe) + # Exclude "locally enhanced sampling" executables. + if "LES" not in exe: + exe = _pathlib.Path(p) / exe + if is_exe(exe): + return str(exe) msg = ( "'BioSimSpace.Process.Amber' is not supported. "