Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: improve support for newer dssp versions #403 #404

Merged
merged 9 commits into from
Aug 4, 2024
Merged
9 changes: 5 additions & 4 deletions graphein/molecule/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@

from typing import Dict, List

from loguru import logger

from graphein.utils.dependencies import import_message

try:
import rdkit.Chem as Chem
except ImportError:
import_message(
"graphein.molecule.atoms", "rdkit", "rdkit", True, extras=True
except (ImportError, ModuleNotFoundError):
logger.warning(
import_message(__name__, "rdkit", "rdkit", True, extras=True)
)


BASE_ATOMS: List[str] = [
"C",
"H",
Expand Down
4 changes: 4 additions & 0 deletions graphein/molecule/chembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
# Code Repository: https://github.com/a-r-j/graphein
from bioservices import ChEMBL

from graphein.utils.dependencies import requires_python_libs


@requires_python_libs("bioservices")
def get_smiles_from_chembl(chembl_id: str) -> str:
"""Retrieves a SMILE string from a ChEMBL ID.

Expand All @@ -27,6 +30,7 @@ def get_smiles_from_chembl(chembl_id: str) -> str:
return data["molecule_structures"]["canonical_smiles"]


@requires_python_libs("bioservices")
def get_chembl_id_from_smiles(smiles: str) -> str:
"""Retrieves a ChEMBL ID from a SMILE string.

Expand Down
9 changes: 7 additions & 2 deletions graphein/molecule/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from loguru import logger as log
from tqdm.contrib.concurrent import process_map, thread_map

from graphein.utils.dependencies import import_message
from graphein.utils.dependencies import import_message, requires_python_libs
from graphein.utils.utils import (
annotate_edge_metadata,
annotate_graph_metadata,
Expand All @@ -36,9 +36,11 @@
from rdkit import Chem
from rdkit.Chem import AllChem
except ImportError:
import_message("graphein.molecule.graphs", "rdkit", "rdkit", True)
msg = import_message("graphein.molecule.graphs", "rdkit", "rdkit", True)
log.warning(msg)


@requires_python_libs("rdkit")
def initialise_graph_with_metadata(
name: str,
rdmol: rdkit.Mol,
Expand All @@ -60,6 +62,7 @@ def initialise_graph_with_metadata(
)


@requires_python_libs("rdkit")
def add_nodes_to_graph(
G: nx.Graph,
verbose: bool = False,
Expand Down Expand Up @@ -92,6 +95,7 @@ def add_nodes_to_graph(
return G


@requires_python_libs("rdkit")
def generate_3d(
mol: Union[nx.Graph, Chem.Mol], recompute_graph: bool = False
) -> Union[nx.Graph, rdkit.Chem.rdchem.Mol]:
Expand Down Expand Up @@ -130,6 +134,7 @@ def generate_3d(
return rdmol


@requires_python_libs("rdkit")
def construct_graph(
config: Optional[MoleculeGraphConfig] = None,
mol: Optional[rdkit.Mol] = None,
Expand Down
22 changes: 21 additions & 1 deletion graphein/molecule/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import minimum_spanning_tree

from graphein.utils.dependencies import import_message
from graphein.utils.dependencies import import_message, requires_python_libs

try:
import rdkit
Expand Down Expand Up @@ -55,6 +55,7 @@
MAX_NCAND: int = 2000


@requires_python_libs("rdkit")
def get_center(
mol: Union[nx.Graph, Chem.Mol], weights: Optional[np.ndarray] = None
) -> np.ndarray:
Expand All @@ -76,6 +77,7 @@ def get_center(
return np.array(ComputeCentroid(mol.GetConformer(0), weights=weights))


@requires_python_libs("rdkit")
def get_shape_moments(mol: Union[nx.Graph, Chem.Mol]) -> Tuple[float, float]:
"""Calculate principal moments of inertia as defined in https://pubs.acs.org/doi/10.1021/ci025599w

Expand All @@ -94,6 +96,7 @@ def get_shape_moments(mol: Union[nx.Graph, Chem.Mol]) -> Tuple[float, float]:
return npr1, npr2


@requires_python_libs("rdkit")
def count_fragments(mol: Union[nx.Graph, Chem.Mol]) -> int:
"""Counts the number of the disconnected fragments in a molecule.

Expand All @@ -107,6 +110,7 @@ def count_fragments(mol: Union[nx.Graph, Chem.Mol]) -> int:
return len(Chem.GetMolFrags(mol, asMols=True))


@requires_python_libs("rdkit")
def get_max_ring_size(mol: Union[nx.Graph, Chem.Mol]) -> int:
"""
Get the size of the largest ring in a molecule.
Expand All @@ -124,6 +128,7 @@ def get_max_ring_size(mol: Union[nx.Graph, Chem.Mol]) -> int:
return 0 if len(atom_rings) == 0 else max(len(x) for x in ri.AtomRings())


@requires_python_libs("rdkit")
def label_rdmol_atoms(
mol: Union[nx.Graph, Chem.Mol], labels: List[Any]
) -> Union[nx.Graph, Chem.Mol]:
Expand All @@ -146,6 +151,7 @@ def label_rdmol_atoms(
return mol


@requires_python_libs("rdkit")
def tag_rdmol_atoms(
mol, atoms_to_tag, tag: str = "x"
) -> Union[nx.Graph, Chem.Mol]:
Expand All @@ -156,6 +162,7 @@ def tag_rdmol_atoms(
return mol


@requires_python_libs("rdkit")
def get_mol(smiles: str) -> rdkit.Chem.rdchem.Mol:
"""
Function for getting rdmol from smiles. Applies kekulization.
Expand All @@ -172,6 +179,7 @@ def get_mol(smiles: str) -> rdkit.Chem.rdchem.Mol:
return mol


@requires_python_libs("rdkit")
def get_smiles(mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]) -> str:
"""
Function for getting smiles from rdmol. Applies kekulization.
Expand All @@ -186,6 +194,7 @@ def get_smiles(mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]) -> str:
return Chem.MolToSmiles(mol, kekuleSmiles=True)


@requires_python_libs("rdkit")
def sanitize(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
"""
Function for sanitizing a rdmol
Expand All @@ -203,6 +212,7 @@ def sanitize(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
return mol


@requires_python_libs("rdkit")
def copy_edit_mol(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
"""
Function for copying a rdmol
Expand All @@ -224,6 +234,7 @@ def copy_edit_mol(mol: rdkit.Chem.rdchem.Mol) -> rdkit.Chem.rdchem.Mol:
return new_mol


@requires_python_libs("rdkit")
def get_clique_mol(mol: rdkit.Chem.rdchem.Atom, atoms: List[int]):
"""
Function for getting clique rdmol
Expand All @@ -242,6 +253,7 @@ def get_clique_mol(mol: rdkit.Chem.rdchem.Atom, atoms: List[int]):
return new_mol


@requires_python_libs("rdkit")
def copy_rdmol_atom(atom: rdkit.Chem.rdchem.Atom) -> rdkit.Chem.rdchem.Atom:
"""
Function for copying an atom
Expand All @@ -257,6 +269,7 @@ def copy_rdmol_atom(atom: rdkit.Chem.rdchem.Atom) -> rdkit.Chem.rdchem.Atom:
return new_atom


@requires_python_libs("rdkit")
def get_morgan_fp(
mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol],
radius: int = 2,
Expand All @@ -281,6 +294,7 @@ def get_morgan_fp(
)


@requires_python_libs("rdkit")
def get_morgan_fp_np(
mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol],
radius: int = 2,
Expand All @@ -307,18 +321,21 @@ def get_morgan_fp_np(
return arr


@requires_python_libs("rdkit")
def compute_fragments(mol: Union[nx.Graph, Chem.Mol]) -> List[Chem.Mol]:
if isinstance(mol, nx.Graph):
mol = mol.graph["rdmol"]
return list(Chem.GetMolFrags(mol, asMols=True))


@requires_python_libs("rdkit")
def get_mol_weight(mol: Union[nx.Graph, Chem.Mol]) -> float:
if isinstance(mol, nx.Graph):
mol = mol.graph["rdmol"]
return mol # TDOO


@requires_python_libs("rdkit")
def get_qed_score(
mol: Union[nx.Graph, rdkit.Chem.rdchem.Mol]
) -> Union[float, None]:
Expand Down Expand Up @@ -364,6 +381,7 @@ def simplify_smile(smile: str) -> str:
return "".join(stripped_smile)


@requires_python_libs("selfies")
def smile_to_selfies(smile: str) -> str:
"""Encodes a SMILES string into a Selfies string.

Expand All @@ -375,6 +393,7 @@ def smile_to_selfies(smile: str) -> str:
return sf.encoder(smile)


@requires_python_libs("selfies")
def selfies_to_smile(selfie: str) -> str:
"""Decodes a selfies string into a SMILES string.

Expand All @@ -386,6 +405,7 @@ def selfies_to_smile(selfie: str) -> str:
return sf.decoder(selfie)


@requires_python_libs("rdkit")
def tree_decomp(mol: rdkit.Chem.rdchem.Mol) -> Tuple[List]:
"""
Function for decomposing rdmol to a tree
Expand Down
40 changes: 26 additions & 14 deletions graphein/protein/features/nodes/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
import networkx as nx
import pandas as pd
from Bio.PDB.DSSP import dssp_dict_from_pdb_file, residue_max_acc
from loguru import logger

from graphein.protein.resi_atoms import STANDARD_AMINO_ACID_MAPPING_1_TO_3
from graphein.protein.utils import save_pdb_df_to_pdb
from graphein.utils.dependencies import is_tool
from graphein.utils.dependencies import is_tool, requires_external_dependencies

DSSP_COLS = [
"chain",
Expand Down Expand Up @@ -70,6 +71,7 @@ def parse_dssp_df(dssp: Dict[str, Any]) -> pd.DataFrame:
return pd.DataFrame.from_records(appender, columns=DSSP_COLS)


@requires_external_dependencies("mkdssp")
def add_dssp_df(
G: nx.Graph,
dssp_config: Optional[DSSPConfig],
Expand All @@ -79,12 +81,13 @@ def add_dssp_df(

:param G: Input protein graph
:param G: nx.Graph
:param dssp_config: DSSPConfig object. Specifies which executable to run. Located in graphein.protein.config
:param dssp_config: DSSPConfig object. Specifies which executable to run.
Located in `:obj:graphein.protein.config`.
:type dssp_config: DSSPConfig, optional
:return: Protein graph with DSSP dataframe added
:rtype: nx.Graph
"""

# if dssp_config is None:
config = G.graph["config"]
pdb_code = G.graph["pdb_code"]
path = G.graph["path"]
Expand All @@ -107,14 +110,14 @@ def add_dssp_df(
if os.path.isfile(config.pdb_dir / (pdb_code + ".pdb")):
pdb_file = config.pdb_dir / (pdb_code + ".pdb")

# get dssp version string
dssp_version = re.search(
r"version ([\d\.]+)", os.popen(f"{executable} --version").read()
).group(
1
) # e.g. "4.0.4"
# Check for existence of pdb file. If not, reconstructs it from the raw df.
if pdb_file:
# get dssp version string
dssp_version = re.search(
r"version ([\d\.]+)", os.popen(f"{executable} --version").read()
).group(
1
) # e.g. "4.0.4"
dssp_dict = dssp_dict_from_pdb_file(
pdb_file, DSSP=executable, dssp_version=dssp_version
)
Expand All @@ -124,17 +127,26 @@ def add_dssp_df(
G.graph["raw_pdb_df"], tmpdirname + f"/{pdb_name}.pdb"
)
dssp_dict = dssp_dict_from_pdb_file(
tmpdirname + f"/{pdb_name}.pdb", DSSP=executable
tmpdirname + f"/{pdb_name}.pdb",
DSSP=executable,
dssp_version=dssp_version,
)

if len(dssp_dict[0]) == 0:
raise ValueError(
"DSSP could not be calculated. Check DSSP version "
f"({dssp_version}) orthat the input PDB file is valid."
)

if config.verbose:
print(f"Using DSSP executable '{executable}'")
logger.debug(f"Using DSSP executable '{executable}'")

dssp_dict = parse_dssp_df(dssp_dict)
# Convert 1 letter aa code to 3 letter
dssp_dict["aa"] = dssp_dict["aa"].map(STANDARD_AMINO_ACID_MAPPING_1_TO_3)

# Resolve UNKs NOTE: the original didn't work if HETATM residues exist in DSSP output
# Resolve UNKs
# NOTE: the original didn't work if HETATM residues exist in DSSP output
_raw_pdb_df = G.graph["raw_pdb_df"].copy().drop_duplicates("node_id")
_dssp_df_unk = dssp_dict.loc[dssp_dict["aa"] == "UNK"][
["chain", "resnum", "icode"]
Expand Down Expand Up @@ -177,7 +189,7 @@ def add_dssp_df(
dssp_dict.set_index("node_id", inplace=True)

if config.verbose:
print(dssp_dict)
logger.debug(dssp_dict)

# Assign DSSP Dict
G.graph["dssp_df"] = dssp_dict
Expand Down Expand Up @@ -241,7 +253,7 @@ def add_dssp_feature(G: nx.Graph, feature: str) -> nx.Graph:
nx.set_node_attributes(G, dict(dssp_df[feature]), feature)

if config.verbose:
print("Added " + feature + " features to graph nodes")
logger.debug("Added " + feature + " features to graph nodes")

return G

Expand Down
Loading
Loading