Merge pull request #67 from davidkastner/modeller-protoss-loop

Updated Modeller Protoss loop to fix clashes
davidkastner · Aug 15, 2024 · f0296e7 · f0296e7
2 parents 4707ca3 + a290e84
commit f0296e7
Show file tree

Hide file tree

Showing 14 changed files with 412 additions and 959 deletions.
diff --git a/config.yaml b/config.yaml
@@ -7,13 +7,14 @@ coordination: true                     # Generate clusters
 skip: all                              # Skip 'modeller', 'protoss', or 'all' jobs if they already exist
 optimize_select_residues: 1            # Modeller to model no (0), missing (1), or all (2) residues
 convert_to_oxo: true                   # Used to convert A-KG NHIEs to Succinate-Oxo NHIEs, usually False
+max_clash_refinement_iter: 5           # The max number of times to iterate over the modeller protoss loop 
 
 # Cluster model parameters
 max_atom_count: 750            
 center_residues: [FE, FE2]             # Residue name to use as the cluster center
+additional_ligands: []                 # Residues beyond first sphere to include
 number_of_spheres: 3                   # Coordination spheres to generate
 radius_of_first_sphere: 4.0            # Distance based cut off to determine first sphere
-additional_ligands: []                 # Residues beyond first sphere to include
 include_ligands: 2                     # Only ligands and waters in first sphere (0), Only non-water ligands (1), Everything (2)
 capping_method: 1                      # Cap residues with None (0), hydrogens (1), ACE/NME (2)
 smoothing_method: 2                    # Smoothing method Box plot (0), DBSCAN (1), [Dummy Atom] (2), None (3)

diff --git a/qp/checks/charge_count.py b/qp/checks/charge_count.py
@@ -35,4 +35,4 @@ def check_charge(cluster_path):
     xyzfile = glob(os.path.join(cluster_path, "*.xyz"))[0]
     num_electron = count_electron(xyzfile)
     if (num_electron - charge) % 2 == spinmult % 2:
-        print(f"charge {charge}, spin multiplicity {spinmult}, electron number {num_electron} in {cluster_path} are not consistent!")
+        print(f"> ERROR: charge {charge}, spin multiplicity {spinmult}, electron number {num_electron} in {os.path.basename(cluster_path)} are not consistent!")
diff --git a/qp/checks/fetch_pdb.py b/qp/checks/fetch_pdb.py
@@ -18,14 +18,14 @@ def fetch_pdb(pdb, out):
     url = f"https://files.rcsb.org/view/{pdb}.pdb"
     r = requests.get(url)
     if r.status_code != 200:
-        raise ValueError("Error fetching PDB file")
+        raise ValueError("> ERROR: Could not fetch PDB file")
 
     os.makedirs(os.path.dirname(os.path.abspath(out)), exist_ok=True)
     with open(out, "w") as f:
         f.write(r.text)
 
 
-def parse_input(input_path, output_path):
+def get_pdbs(input_path, output_path):
     """
     Parses the input PDBs and returns a list of tuples
 
@@ -69,3 +69,33 @@ def parse_input(input_path, output_path):
     return pdb_all
 
 
+def get_centers(input_path):
+    """
+    Parses the input centers and returns a list of the center residue PDB IDs.
+
+    Parameters
+    ----------
+    input_path: list
+        List of pdbs or the input csv file.
+
+    Returns
+    -------
+    centers: list
+        List PDB IDs for user-defined center residues.
+    """
+
+    centers = []
+    for pdb_id in input_path:
+        if os.path.isfile(pdb_id):
+            pdb, ext = os.path.splitext(os.path.basename(pdb_id))
+            pdb = pdb.replace(".", "_")
+            if ext == ".pdb":
+                centers.append((pdb, pdb_id))
+            elif ext == ".csv":
+                input_csv = pdb_id
+                with open(input_csv, "r") as csvfile:
+                    reader = csv.DictReader(csvfile)
+                    for row in reader:
+                        center = row['center']
+                        centers.append(center)
+    return centers
diff --git a/qp/checks/protoss_atom_renaming.py b/qp/checks/protoss_atom_renaming.py
@@ -0,0 +1,74 @@
+'''Checks if protoss changed HETATM or ATOM classifications.'''
+
+from Bio.PDB import PDBParser
+
+def protoss_atom_renaming(modeller_pdb_path, protoss_pdb_path):
+    '''
+    Checks if protoss changed HETATM or ATOM classifications.
+
+    Parameters
+    ----------
+    modeller_pdb_path: string
+        The path to the original modeller PDB.
+    protoss_pdb_path: string
+        The path to the PDB processed by protoss.
+
+    Returns
+    -------
+    changed_residues: list
+        List of residues that have been changed from ATOM to HETATM or vice versa.
+
+    Note
+    ----
+    changed_residues = [
+    ('A', (' ', 123, ' '), 'ATOM', 'HETATM'),
+    ('B', (' ', 45, ' '), 'HETATM', 'ATOM'),
+    ('C', ('H', 67, ' '), 'HETATM', 'ATOM')]
+    '''
+
+    # Initialize the parser
+    parser = PDBParser(QUIET=True)
+
+    # Parse the PDB files
+    modeller_structure = parser.get_structure('modeller', modeller_pdb_path)
+    protoss_structure = parser.get_structure('protoss', protoss_pdb_path)
+
+    changed_residues = []
+
+    # Iterate through all chains and residues in the modeller structure
+    for modeller_chain in modeller_structure.get_chains():
+        chain_id = modeller_chain.get_id()
+
+        if chain_id not in protoss_structure[0]:
+            print(f"> WARNING: Chain {chain_id} not found in protoss structure.")
+            continue
+
+        protoss_chain = protoss_structure[0][chain_id]  # Assume the chains are the same
+
+        for modeller_residue in modeller_chain.get_residues():
+            res_num = modeller_residue.get_id()[1]
+            resname = modeller_residue.get_resname()
+
+            # Find the corresponding residue in the protoss chain by residue number and name
+            protoss_residue = None
+            for res in protoss_chain.get_residues():
+                if res.get_id()[1] == res_num and res.get_resname() == resname:
+                    protoss_residue = res
+                    break
+
+            if protoss_residue is None:
+                print(f"> WARNING: Residue {modeller_residue.get_id()} in chain {chain_id} not found in protoss structure.")
+                continue
+
+            # Compare the residue types (HETATM vs ATOM)
+            modeller_type = 'HETATM' if modeller_residue.id[0].startswith('H_') else 'ATOM'
+            protoss_type = 'HETATM' if protoss_residue.id[0].startswith('H_') else 'ATOM'
+
+            if modeller_type != protoss_type:
+                changed_residues.append((chain_id, resname, res_num, modeller_type, protoss_type))
+
+    # Alert the user if any residues have changed
+    for chain_id, resname, res_num, old_type, new_type in changed_residues:
+        print(f"> WARNING: {resname} {res_num} in chain {chain_id} was changed from {old_type} to {new_type}.")
+
+    return changed_residues