From afb6b0599437b9dc305ed9b1517e80c0cc00bee3 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Fri, 16 Aug 2024 13:00:33 +0100 Subject: [PATCH] Preserve SMILES molecule properties when parameterising. [closes #330] --- .../Parameters/_Protocol/_amber.py | 110 +++++++----------- .../Parameters/_Protocol/_openforcefield.py | 59 +++++----- .../Exscientia/Parameters/_Protocol/_amber.py | 110 +++++++----------- .../Parameters/_Protocol/_openforcefield.py | 59 +++++----- tests/Parameters/test_parameters.py | 30 +++++ .../Exscientia/Parameters/test_parameters.py | 30 +++++ 6 files changed, 192 insertions(+), 206 deletions(-) diff --git a/python/BioSimSpace/Parameters/_Protocol/_amber.py b/python/BioSimSpace/Parameters/_Protocol/_amber.py index bdddd0851..b554c12ca 100644 --- a/python/BioSimSpace/Parameters/_Protocol/_amber.py +++ b/python/BioSimSpace/Parameters/_Protocol/_amber.py @@ -310,15 +310,24 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Flag whether the molecule is a SMILES string. + # Try to create a molecule from the SMILES string. if isinstance(molecule, str): - is_smiles = True - else: - is_smiles = False + try: + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() + except Exception as e: + msg = "Unable to convert SMILES to Molecule using RDKit." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None - if not is_smiles: - # Create a copy of the molecule. - new_mol = molecule.copy() + # Create a copy of the molecule. + new_mol = molecule.copy() # Choose the program to run with depending on the force field compatibility. # If tLEaP and pdb2gmx are supported, default to tLEaP, then use pdb2gmx if @@ -328,8 +337,7 @@ def run(self, molecule, work_dir=None, queue=None): if self._tleap: if _tleap_exe is not None: output = self._run_tleap(molecule, str(work_dir)) - if not is_smiles: - new_mol._ion_water_model = self._water_model + new_mol._ion_water_model = self._water_model # Otherwise, try using pdb2gmx. elif self._pdb2gmx: if _gmx_exe is not None: @@ -371,41 +379,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield @@ -1009,9 +999,12 @@ def run(self, molecule, work_dir=None, queue=None): # Convert SMILES to a molecule. if isinstance(molecule, str): - is_smiles = True try: + smiles = molecule new_mol = _smiles(molecule) + edit_mol = new_mol._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + new_mol._sire_object = edit_mol.commit() except Exception as e: msg = "Unable to convert SMILES to Molecule using RDKit." if _isVerbose(): @@ -1020,7 +1013,6 @@ def run(self, molecule, work_dir=None, queue=None): else: raise _ThirdPartyError(msg) from None else: - is_smiles = False new_mol = molecule.copy() # Use the net molecular charge passed as an option. @@ -1251,45 +1243,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = ( - edit_mol.residue(_SireMol.ResIdx(0)) - .rename(resname) - .molecule() + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, ) - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = ff diff --git a/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py b/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py index c94e7d349..2b61a0d2f 100644 --- a/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py +++ b/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py @@ -116,6 +116,7 @@ from ... import _isVerbose from ... import IO as _IO +from ...Convert import smiles as _smiles from ..._Exceptions import IncompatibleError as _IncompatibleError from ..._Exceptions import ThirdPartyError as _ThirdPartyError from ..._SireWrappers import Molecule as _Molecule @@ -415,31 +416,31 @@ def run(self, molecule, work_dir=None, queue=None): # Make the parameterised molecule compatible with the original topology. if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - # Since the name is written to topology file formats, we - # need to ensure that it doesn't start with an [ character, - # which would break GROMACS. - name = molecule - if name.startswith("["): - name = f"smiles:{name}" - - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(name).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() + # Try to create BioSimSpace molecule from the SMILES string. + try: + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() + except Exception as e: + msg = "Unable to convert SMILES to Molecule using RDKit." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None + if self._ensure_compatible: + new_mol = molecule.copy() + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: + try: new_mol = molecule.copy() new_mol.makeCompatibleWith( par_mol, @@ -447,16 +448,8 @@ def run(self, molecule, work_dir=None, queue=None): overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield diff --git a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py index bdddd0851..b554c12ca 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py @@ -310,15 +310,24 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Flag whether the molecule is a SMILES string. + # Try to create a molecule from the SMILES string. if isinstance(molecule, str): - is_smiles = True - else: - is_smiles = False + try: + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() + except Exception as e: + msg = "Unable to convert SMILES to Molecule using RDKit." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None - if not is_smiles: - # Create a copy of the molecule. - new_mol = molecule.copy() + # Create a copy of the molecule. + new_mol = molecule.copy() # Choose the program to run with depending on the force field compatibility. # If tLEaP and pdb2gmx are supported, default to tLEaP, then use pdb2gmx if @@ -328,8 +337,7 @@ def run(self, molecule, work_dir=None, queue=None): if self._tleap: if _tleap_exe is not None: output = self._run_tleap(molecule, str(work_dir)) - if not is_smiles: - new_mol._ion_water_model = self._water_model + new_mol._ion_water_model = self._water_model # Otherwise, try using pdb2gmx. elif self._pdb2gmx: if _gmx_exe is not None: @@ -371,41 +379,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield @@ -1009,9 +999,12 @@ def run(self, molecule, work_dir=None, queue=None): # Convert SMILES to a molecule. if isinstance(molecule, str): - is_smiles = True try: + smiles = molecule new_mol = _smiles(molecule) + edit_mol = new_mol._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + new_mol._sire_object = edit_mol.commit() except Exception as e: msg = "Unable to convert SMILES to Molecule using RDKit." if _isVerbose(): @@ -1020,7 +1013,6 @@ def run(self, molecule, work_dir=None, queue=None): else: raise _ThirdPartyError(msg) from None else: - is_smiles = False new_mol = molecule.copy() # Use the net molecular charge passed as an option. @@ -1251,45 +1243,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = ( - edit_mol.residue(_SireMol.ResIdx(0)) - .rename(resname) - .molecule() + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, ) - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = ff diff --git a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py index c94e7d349..2b61a0d2f 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py @@ -116,6 +116,7 @@ from ... import _isVerbose from ... import IO as _IO +from ...Convert import smiles as _smiles from ..._Exceptions import IncompatibleError as _IncompatibleError from ..._Exceptions import ThirdPartyError as _ThirdPartyError from ..._SireWrappers import Molecule as _Molecule @@ -415,31 +416,31 @@ def run(self, molecule, work_dir=None, queue=None): # Make the parameterised molecule compatible with the original topology. if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - # Since the name is written to topology file formats, we - # need to ensure that it doesn't start with an [ character, - # which would break GROMACS. - name = molecule - if name.startswith("["): - name = f"smiles:{name}" - - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(name).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() + # Try to create BioSimSpace molecule from the SMILES string. + try: + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() + except Exception as e: + msg = "Unable to convert SMILES to Molecule using RDKit." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None + if self._ensure_compatible: + new_mol = molecule.copy() + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: + try: new_mol = molecule.copy() new_mol.makeCompatibleWith( par_mol, @@ -447,16 +448,8 @@ def run(self, molecule, work_dir=None, queue=None): overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield diff --git a/tests/Parameters/test_parameters.py b/tests/Parameters/test_parameters.py index 8acd41ff7..e9c07cc13 100644 --- a/tests/Parameters/test_parameters.py +++ b/tests/Parameters/test_parameters.py @@ -137,3 +137,33 @@ def test_leap_commands(molecule0): assert line_pre[-1] < line_post[0] for x in range(len(line_post) - 1): assert line_post[x] < line_post[x + 1] + + +@pytest.mark.skipif( + has_antechamber is False or has_tleap is False, + reason="Requires AmberTools/antechamber and tLEaP to be installed.", +) +def test_smiles_stereo(): + """ + Test that SMILES string stereochemistry is correctly preserved when + parameterising a molecule. + """ + + from rdkit import Chem + + # Define the SMILES string. + smiles = "CC[C@@H](O)C" + + # Create the parameterised molecule. + mol = BSS.Parameters.gaff(smiles).getMolecule() + + # Convert to RDKit format. + rdmol0 = Chem.MolFromSmiles(smiles) + rdmol1 = BSS.Convert.toRDKit(mol) + + # Get the SMILES string. + rdmol0_smiles = Chem.MolToSmiles(rdmol0) + rdmol1_smiles = Chem.MolToSmiles(Chem.RemoveHs(rdmol1)) + + # Make sure the SMILES strings are the same. + assert rdmol0_smiles == rdmol1_smiles diff --git a/tests/Sandpit/Exscientia/Parameters/test_parameters.py b/tests/Sandpit/Exscientia/Parameters/test_parameters.py index f42f2d539..00bb088b2 100644 --- a/tests/Sandpit/Exscientia/Parameters/test_parameters.py +++ b/tests/Sandpit/Exscientia/Parameters/test_parameters.py @@ -142,3 +142,33 @@ def test_leap_commands(molecule0): assert line_pre[-1] < line_post[0] for x in range(len(line_post) - 1): assert line_post[x] < line_post[x + 1] + + +@pytest.mark.skipif( + has_antechamber is False or has_tleap is False, + reason="Requires AmberTools/antechamber and tLEaP to be installed.", +) +def test_smiles_stereo(): + """ + Test that SMILES string stereochemistry is correctly preserved when + parameterising a molecule. + """ + + from rdkit import Chem + + # Define the SMILES string. + smiles = "CC[C@@H](O)C" + + # Create the parameterised molecule. + mol = BSS.Parameters.gaff(smiles).getMolecule() + + # Convert to RDKit format. + rdmol0 = Chem.MolFromSmiles(smiles) + rdmol1 = BSS.Convert.toRDKit(mol) + + # Get the SMILES string. + rdmol0_smiles = Chem.MolToSmiles(rdmol0) + rdmol1_smiles = Chem.MolToSmiles(Chem.RemoveHs(rdmol1)) + + # Make sure the SMILES strings are the same. + assert rdmol0_smiles == rdmol1_smiles