From 22ecbd2be6f05c36f6eb517853cd673edb5adb57 Mon Sep 17 00:00:00 2001 From: kierandidi Date: Tue, 23 Apr 2024 14:49:08 +0100 Subject: [PATCH 1/4] [cleanup] change logging levels to debug to declutter output --- graphein/protein/tensor/io.py | 2 +- graphein/protein/utils.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py index 779aa706..891bfcac 100644 --- a/graphein/protein/tensor/io.py +++ b/graphein/protein/tensor/io.py @@ -49,7 +49,7 @@ conda_channel="pyg", pip_install=True, ) - log.warning(message) + log.debug(message) try: import torch diff --git a/graphein/protein/utils.py b/graphein/protein/utils.py index a27fdf05..fba9075c 100644 --- a/graphein/protein/utils.py +++ b/graphein/protein/utils.py @@ -205,7 +205,7 @@ def download_pdb( # Check if PDB already exists if os.path.exists(out_dir / f"{pdb_code}{extension}") and not overwrite: - log.info( + log.debug( f"{pdb_code} already exists: {out_dir / f'{pdb_code}{extension}'}" ) return out_dir / f"{pdb_code}{extension}" @@ -224,7 +224,7 @@ def download_pdb( assert os.path.exists( out_dir / f"{pdb_code}{extension}" ), f"{pdb_code} download failed. Not found in {out_dir}" - log.info(f"{pdb_code} downloaded to {out_dir}") + log.debug(f"{pdb_code} downloaded to {out_dir}") return out_dir / f"{pdb_code}{extension}" @@ -346,7 +346,7 @@ def download_alphafold_structure( (Path(out_dir) / f"{uniprot_id}{extension}").resolve() ) - log.info(f"Downloaded AlphaFold PDB file for: {uniprot_id}") + log.debug(f"Downloaded AlphaFold PDB file for: {uniprot_id}") if aligned_score: score_query = ( BASE_URL @@ -412,7 +412,7 @@ def save_graph_to_pdb( if hetatms: ppd.df["HETATM"] = hetatm_df ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True) - log.info(f"Successfully saved graph to {path}") + log.debug(f"Successfully saved graph to {path}") def save_pdb_df_to_pdb( @@ -439,7 +439,7 @@ def save_pdb_df_to_pdb( if hetatms: ppd.df["HETATM"] = hetatm_df ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True) - log.info(f"Successfully saved PDB dataframe to {path}") + log.debug(f"Successfully saved PDB dataframe to {path}") def save_rgroup_df_to_pdb( @@ -475,7 +475,7 @@ def save_rgroup_df_to_pdb( if hetatms: ppd.df["HETATM"] = hetatm_df ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True) - log.info(f"Successfully saved rgroup data to {path}") + log.debug(f"Successfully saved rgroup data to {path}") def esmfold( From a0ae40b7c9fef09f0f23837b9abd4659dd6154cf Mon Sep 17 00:00:00 2001 From: kierandidi Date: Tue, 23 Apr 2024 14:54:38 +0100 Subject: [PATCH 2/4] added to CHANGELOG --- CHANGELOG.md | 1 + graphein/protein/tensor/io.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aea5a35..b41d3df0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Fix bug where the `deprotonate` argument is not wired up to `graphein.protein.graphs.construct_graphs`. [#375](https://github.com/a-r-j/graphein/pull/375) #### Misc +* bumped logging level down from `INFO` to `DEBUG` at several places to reduced output length [#391](https://github.com/a-r-j/graphein/pull/391) * exposed `fill_value` and `bfactor` option to `protein_to_pyg` function. [#385](https://github.com/a-r-j/graphein/pull/385) and [#388](https://github.com/a-r-j/graphein/pull/388) * Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas. [#382](https://github.com/a-r-j/graphein/pull/382) * Resolve issue with notebook version and `pluggy` in Dockerfile. [#372](https://github.com/a-r-j/graphein/pull/372) diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py index 891bfcac..dc7698bd 100644 --- a/graphein/protein/tensor/io.py +++ b/graphein/protein/tensor/io.py @@ -60,7 +60,7 @@ conda_channel="pytorch", pip_install=True, ) - log.warning(message) + log.debug(message) def get_protein_length(df: pd.DataFrame, insertions: bool = True) -> int: From 4a3b725ff5dd01bc7f74b9cdacd2991a62e71cb1 Mon Sep 17 00:00:00 2001 From: kierandidi Date: Tue, 23 Apr 2024 14:58:59 +0100 Subject: [PATCH 3/4] [cleanup] change logging levels to debug to declutter output --- graphein/protein/features/sequence/embeddings.py | 4 ++-- graphein/protein/meshes.py | 2 +- graphein/protein/tensor/representation.py | 2 +- graphein/protein/tensor/sequence.py | 2 +- graphein/protein/visualisation.py | 3 ++- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/graphein/protein/features/sequence/embeddings.py b/graphein/protein/features/sequence/embeddings.py index 914bcf85..31f6d5f8 100644 --- a/graphein/protein/features/sequence/embeddings.py +++ b/graphein/protein/features/sequence/embeddings.py @@ -31,7 +31,7 @@ pip_install=True, conda_channel="pytorch", ) - log.warning(message) + log.debug(message) try: import biovec @@ -42,7 +42,7 @@ pip_install=True, extras=True, ) - log.warning(message) + log.debug(message) @lru_cache() diff --git a/graphein/protein/meshes.py b/graphein/protein/meshes.py index a37d2b0f..1f8fc7c4 100644 --- a/graphein/protein/meshes.py +++ b/graphein/protein/meshes.py @@ -27,7 +27,7 @@ conda_channel="pytorch3d", pip_install=True, ) - log.warning(message) + log.debug(message) def check_for_pymol_installation(): diff --git a/graphein/protein/tensor/representation.py b/graphein/protein/tensor/representation.py index 5bbf1426..59b4e873 100644 --- a/graphein/protein/tensor/representation.py +++ b/graphein/protein/tensor/representation.py @@ -24,7 +24,7 @@ conda_channel="pytorch", pip_install=True, ) - log.warning(message) + log.debug(message) def get_full_atom_coords( diff --git a/graphein/protein/tensor/sequence.py b/graphein/protein/tensor/sequence.py index 5f53c0e7..8231a04e 100644 --- a/graphein/protein/tensor/sequence.py +++ b/graphein/protein/tensor/sequence.py @@ -31,7 +31,7 @@ conda_channel="pytorch", pip_install=True, ) - log.warning(message) + log.debug(message) def get_sequence( diff --git a/graphein/protein/visualisation.py b/graphein/protein/visualisation.py index d02bd8dd..c87bcbac 100644 --- a/graphein/protein/visualisation.py +++ b/graphein/protein/visualisation.py @@ -33,7 +33,7 @@ package="pytorch3d", conda_channel="pytorch3d", ) - log.warning(message) + log.debug(message) try: from mpl_chord_diagram import chord_diagram @@ -44,6 +44,7 @@ pip_install=True, extras=True, ) + log.debug(message) def plot_pointcloud(mesh: Meshes, title: str = "") -> Axes3D: From 67c512d8003eb52421e0b4bd23e46f6f440c2d48 Mon Sep 17 00:00:00 2001 From: kierandidi Date: Tue, 23 Apr 2024 15:22:33 +0100 Subject: [PATCH 4/4] [cleanup] change logging levels to debug to declutter output for pdb_data utils --- graphein/ml/datasets/pdb_data.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/graphein/ml/datasets/pdb_data.py b/graphein/ml/datasets/pdb_data.py index 8b521f23..febd3cea 100644 --- a/graphein/ml/datasets/pdb_data.py +++ b/graphein/ml/datasets/pdb_data.py @@ -341,7 +341,7 @@ def _download_pdb_sequences(self): ): log.info("Downloading PDB sequences...") wget.download(self.pdb_sequences_url, out=str(self.root_dir)) - log.info("Downloaded sequences") + log.debug("Downloaded sequences") # Unzip all collected sequences if not os.path.exists(self.root_dir / self.pdb_seqres_filename): @@ -353,7 +353,7 @@ def _download_pdb_sequences(self): self.root_dir / self.pdb_seqres_filename, "wb" ) as f_out: shutil.copyfileobj(f_in, f_out) - log.info("Unzipped sequences") + log.debug("Unzipped sequences") def _download_ligand_map(self): """Download ligand map from @@ -362,7 +362,7 @@ def _download_ligand_map(self): if not os.path.exists(self.root_dir / self.ligand_map_filename): log.info("Downloading ligand map...") wget.download(self.ligand_map_url, out=str(self.root_dir)) - log.info("Downloaded ligand map") + log.debug("Downloaded ligand map") def _download_source_map(self): """Download source map from @@ -371,7 +371,7 @@ def _download_source_map(self): if not os.path.exists(self.root_dir / self.source_map_filename): log.info("Downloading source map...") wget.download(self.source_map_url, out=str(self.root_dir)) - log.info("Downloaded source map") + log.debug("Downloaded source map") def _download_resolution(self): """Download source map from @@ -380,7 +380,7 @@ def _download_resolution(self): if not os.path.exists(self.root_dir / self.resolution_filename): log.info("Downloading resolution map...") wget.download(self.resolution_url, out=str(self.root_dir)) - log.info("Downloaded resolution map") + log.debug("Downloaded resolution map") def _download_entry_metadata(self): """Download PDB entry metadata from @@ -391,7 +391,7 @@ def _download_entry_metadata(self): ): log.info("Downloading entry metadata...") wget.download(self.pdb_deposition_date_url, out=str(self.root_dir)) - log.info("Downloaded entry metadata") + log.debug("Downloaded entry metadata") def _download_exp_type(self): """Download PDB experiment metadata from @@ -400,7 +400,7 @@ def _download_exp_type(self): if not os.path.exists(self.root_dir / self.pdb_entry_type_filename): log.info("Downloading experiment type map...") wget.download(self.pdb_entry_type_url, out=str(self.root_dir)) - log.info("Downloaded experiment type map") + log.debug("Downloaded experiment type map") def _download_pdb_availability(self): """Download PDB availability metadata from @@ -409,7 +409,7 @@ def _download_pdb_availability(self): if not os.path.exists(self.root_dir / self.pdb_availability_filename): log.info("Downloading PDB availability map...") wget.download(self.pdb_availability_url, out=str(self.root_dir)) - log.info("Downloaded PDB availability map") + log.debug("Downloaded PDB availability map") def _parse_ligand_map(self) -> Dict[str, List[str]]: """Parse the ligand maps for all PDB records. @@ -1300,7 +1300,7 @@ def split_clusters( self.split_ratios, self.assign_leftover_rows_to_split_n, ) - log.info("Done splitting clusters") + log.debug("Done splitting clusters") # Update splits for split in self.splits: