From 09e0e44fde911453808ef33b4b46df6c26563936 Mon Sep 17 00:00:00 2001 From: Benzoin96485 Date: Wed, 14 Aug 2024 17:17:30 -0400 Subject: [PATCH 1/2] cap the residues with neighbors deleted by protoss --- qp/cluster/coordination_spheres.py | 60 +++++++++++++++++++++++++----- qp/structure/add_hydrogens.py | 2 +- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/qp/cluster/coordination_spheres.py b/qp/cluster/coordination_spheres.py index 7bcece3..032ce76 100644 --- a/qp/cluster/coordination_spheres.py +++ b/qp/cluster/coordination_spheres.py @@ -21,10 +21,13 @@ """ import os +from typing import Set, Literal, Optional import numpy as np from Bio.PDB import PDBParser, Polypeptide, PDBIO, Select from Bio.PDB.Atom import Atom from Bio.PDB.Residue import Residue +from Bio.PDB.Chain import Chain +from Bio.PDB.Model import Model from Bio.PDB.NeighborSearch import NeighborSearch from scipy.spatial import Voronoi from qp.structure import struct_to_file @@ -482,7 +485,12 @@ def scale_hydrogen(a, b, scale): return scale * (q - p) + p -def build_hydrogen(chain, parent, template, atom): +def get_normalized_vector(atom1: Atom, atom2: Atom) -> np.array: + v = atom2.get_coord() - atom1.get_coord() + return v / np.linalg.norm(v) + + +def build_hydrogen(parent: Residue, template: Optional[Residue], atom: Literal["N", "C"]): """ Cap with hydrogen, building based on the upstream or downstream residue @@ -502,10 +510,25 @@ def build_hydrogen(chain, parent, template, atom): res: Bio.PDB.Residue Residue containing added hydrogen """ - if atom == "N": - pos = scale_hydrogen(parent["N"], template["C"], 1 / 1.32) + if template is not None: + if atom == "N": + pos = scale_hydrogen(parent["N"], template["C"], 1 / 1.32) + else: + pos = scale_hydrogen(parent["C"], template["N"], 1.09 / 1.32) else: - pos = scale_hydrogen(parent["C"], template["N"], 1.09 / 1.32) + CA = parent["CA"] + if atom == "N": + N = parent["N"] + H = parent["H"] + bis = get_normalized_vector(N, CA) + get_normalized_vector(N, H) + bis /= np.linalg.norm(bis) + pos = N.get_coord() - bis + else: + C = parent["C"] + O = parent["O"] + bis = get_normalized_vector(C, CA) + get_normalized_vector(C, O) + bis /= np.linalg.norm(bis) + pos = C.get_coord() - bis * 1.09 for name in ["H1", "H2", "H3"]: if name not in parent: @@ -572,7 +595,20 @@ def build_heavy(chain, parent, template, atom): return res -def cap_chains(model, residues, capping): +def check_atom_valence(res: Residue, tree: NeighborSearch, atom: Literal["N", "C"], cn: int) -> bool: + neighbors = tree.search(res[atom].get_coord(), radius=1.8) + if len(neighbors) > cn: + return True + else: + for neighbor in neighbors: + if neighbor.get_name() == "C" and atom == "N": + return True + elif neighbor.get_name() == "N" and atom == "C": + return True + return False + + +def cap_chains(model: Model, residues: Set[Residue], capping: int) -> Set[Residue]: """ Cap chain breaks for a set of extracted residues @@ -602,6 +638,7 @@ def cap_chains(model, residues, capping): res_id = res.get_full_id() chain = model[res_id[2]] + chain_tree = NeighborSearch(list(chain.get_atoms())) chain_list = orig_chains[chain.get_id()] ind = chain_list.index(res) @@ -614,9 +651,12 @@ def cap_chains(model, residues, capping): and Polypeptide.is_aa(pre) ): # ignores hetero residues if capping == 1: - cap_residues.add(build_hydrogen(chain, res, pre, "N")) + cap_residues.add(build_hydrogen(res, pre, "N")) else: - cap_residues.add(build_heavy(chain, res, pre, "N")) + cap_residues.add(build_heavy(res, pre, "N")) + elif not check_atom_valence(res, chain_tree, "N", 3): + print(chain, nxt.get_id(), res_id) + cap_residues.add(build_hydrogen(res, None, "N")) if ind < len(chain_list) - 1: nxt = chain_list[ind + 1] @@ -627,9 +667,11 @@ def cap_chains(model, residues, capping): and Polypeptide.is_aa(nxt) ): if capping == 1: - cap_residues.add(build_hydrogen(chain, res, nxt, "C")) + cap_residues.add(build_hydrogen(res, nxt, "C")) else: - cap_residues.add(build_heavy(chain, res, nxt, "C")) + cap_residues.add(build_heavy(res, nxt, "C")) + elif not check_atom_valence(res, chain_tree, "N", 3): + cap_residues.add(build_hydrogen(res, None, "C")) return cap_residues diff --git a/qp/structure/add_hydrogens.py b/qp/structure/add_hydrogens.py index 8d25658..353a635 100644 --- a/qp/structure/add_hydrogens.py +++ b/qp/structure/add_hydrogens.py @@ -532,7 +532,7 @@ def compute_charge(path_ligand, path_pdb): if line.startswith("M RGP"): # R# atom is found in the addition between CSO and TAN # It's not a real atom and recorded in the RGP entry - n_atom -= sum([int(x) for x in line.split()[4::2]]) + n_atom -= int(line.split()[2]) if line.startswith("M CHG"): c += sum([int(x) for x in line.split()[4::2]]) break From 2f0dfb8de023ab8d58722778718aa417cac32c5c Mon Sep 17 00:00:00 2001 From: Benzoin96485 Date: Thu, 15 Aug 2024 15:35:53 -0400 Subject: [PATCH 2/2] delete abundant print info --- qp/cluster/coordination_spheres.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qp/cluster/coordination_spheres.py b/qp/cluster/coordination_spheres.py index 032ce76..ce9efde 100644 --- a/qp/cluster/coordination_spheres.py +++ b/qp/cluster/coordination_spheres.py @@ -655,7 +655,6 @@ def cap_chains(model: Model, residues: Set[Residue], capping: int) -> Set[Residu else: cap_residues.add(build_heavy(res, pre, "N")) elif not check_atom_valence(res, chain_tree, "N", 3): - print(chain, nxt.get_id(), res_id) cap_residues.add(build_hydrogen(res, None, "N")) if ind < len(chain_list) - 1: