From 22ecbd2be6f05c36f6eb517853cd673edb5adb57 Mon Sep 17 00:00:00 2001
From: kierandidi <kieran.didi@gmail.com>
Date: Tue, 23 Apr 2024 14:49:08 +0100
Subject: [PATCH 1/4] [cleanup] change logging levels to debug to declutter
 output

---
 graphein/protein/tensor/io.py |  2 +-
 graphein/protein/utils.py     | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py
index 779aa706..891bfcac 100644
--- a/graphein/protein/tensor/io.py
+++ b/graphein/protein/tensor/io.py
@@ -49,7 +49,7 @@
         conda_channel="pyg",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 try:
     import torch
diff --git a/graphein/protein/utils.py b/graphein/protein/utils.py
index a27fdf05..fba9075c 100644
--- a/graphein/protein/utils.py
+++ b/graphein/protein/utils.py
@@ -205,7 +205,7 @@ def download_pdb(
 
     # Check if PDB already exists
     if os.path.exists(out_dir / f"{pdb_code}{extension}") and not overwrite:
-        log.info(
+        log.debug(
             f"{pdb_code} already exists: {out_dir / f'{pdb_code}{extension}'}"
         )
         return out_dir / f"{pdb_code}{extension}"
@@ -224,7 +224,7 @@ def download_pdb(
         assert os.path.exists(
             out_dir / f"{pdb_code}{extension}"
         ), f"{pdb_code} download failed. Not found in {out_dir}"
-    log.info(f"{pdb_code} downloaded to {out_dir}")
+    log.debug(f"{pdb_code} downloaded to {out_dir}")
     return out_dir / f"{pdb_code}{extension}"
 
 
@@ -346,7 +346,7 @@ def download_alphafold_structure(
             (Path(out_dir) / f"{uniprot_id}{extension}").resolve()
         )
 
-    log.info(f"Downloaded AlphaFold PDB file for: {uniprot_id}")
+    log.debug(f"Downloaded AlphaFold PDB file for: {uniprot_id}")
     if aligned_score:
         score_query = (
             BASE_URL
@@ -412,7 +412,7 @@ def save_graph_to_pdb(
     if hetatms:
         ppd.df["HETATM"] = hetatm_df
     ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True)
-    log.info(f"Successfully saved graph to {path}")
+    log.debug(f"Successfully saved graph to {path}")
 
 
 def save_pdb_df_to_pdb(
@@ -439,7 +439,7 @@ def save_pdb_df_to_pdb(
     if hetatms:
         ppd.df["HETATM"] = hetatm_df
     ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True)
-    log.info(f"Successfully saved PDB dataframe to {path}")
+    log.debug(f"Successfully saved PDB dataframe to {path}")
 
 
 def save_rgroup_df_to_pdb(
@@ -475,7 +475,7 @@ def save_rgroup_df_to_pdb(
     if hetatms:
         ppd.df["HETATM"] = hetatm_df
     ppd.to_pdb(path=path, records=None, gz=gz, append_newline=True)
-    log.info(f"Successfully saved rgroup data to {path}")
+    log.debug(f"Successfully saved rgroup data to {path}")
 
 
 def esmfold(

From a0ae40b7c9fef09f0f23837b9abd4659dd6154cf Mon Sep 17 00:00:00 2001
From: kierandidi <kieran.didi@gmail.com>
Date: Tue, 23 Apr 2024 14:54:38 +0100
Subject: [PATCH 2/4] added to CHANGELOG

---
 CHANGELOG.md                  | 1 +
 graphein/protein/tensor/io.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9aea5a35..b41d3df0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 * Fix bug where the `deprotonate` argument is not wired up to `graphein.protein.graphs.construct_graphs`. [#375](https://github.com/a-r-j/graphein/pull/375)
 
 #### Misc
+* bumped logging level down from `INFO` to `DEBUG` at several places to reduced output length [#391](https://github.com/a-r-j/graphein/pull/391)
 * exposed `fill_value` and `bfactor` option to `protein_to_pyg` function. [#385](https://github.com/a-r-j/graphein/pull/385) and [#388](https://github.com/a-r-j/graphein/pull/388)
 * Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas. [#382](https://github.com/a-r-j/graphein/pull/382)
 * Resolve issue with notebook version and `pluggy` in Dockerfile. [#372](https://github.com/a-r-j/graphein/pull/372)
diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py
index 891bfcac..dc7698bd 100644
--- a/graphein/protein/tensor/io.py
+++ b/graphein/protein/tensor/io.py
@@ -60,7 +60,7 @@
         conda_channel="pytorch",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def get_protein_length(df: pd.DataFrame, insertions: bool = True) -> int:

From 4a3b725ff5dd01bc7f74b9cdacd2991a62e71cb1 Mon Sep 17 00:00:00 2001
From: kierandidi <kieran.didi@gmail.com>
Date: Tue, 23 Apr 2024 14:58:59 +0100
Subject: [PATCH 3/4] [cleanup] change logging levels to debug to declutter
 output

---
 graphein/protein/features/sequence/embeddings.py | 4 ++--
 graphein/protein/meshes.py                       | 2 +-
 graphein/protein/tensor/representation.py        | 2 +-
 graphein/protein/tensor/sequence.py              | 2 +-
 graphein/protein/visualisation.py                | 3 ++-
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/graphein/protein/features/sequence/embeddings.py b/graphein/protein/features/sequence/embeddings.py
index 914bcf85..31f6d5f8 100644
--- a/graphein/protein/features/sequence/embeddings.py
+++ b/graphein/protein/features/sequence/embeddings.py
@@ -31,7 +31,7 @@
         pip_install=True,
         conda_channel="pytorch",
     )
-    log.warning(message)
+    log.debug(message)
 
 try:
     import biovec
@@ -42,7 +42,7 @@
         pip_install=True,
         extras=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 @lru_cache()
diff --git a/graphein/protein/meshes.py b/graphein/protein/meshes.py
index a37d2b0f..1f8fc7c4 100644
--- a/graphein/protein/meshes.py
+++ b/graphein/protein/meshes.py
@@ -27,7 +27,7 @@
         conda_channel="pytorch3d",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def check_for_pymol_installation():
diff --git a/graphein/protein/tensor/representation.py b/graphein/protein/tensor/representation.py
index 5bbf1426..59b4e873 100644
--- a/graphein/protein/tensor/representation.py
+++ b/graphein/protein/tensor/representation.py
@@ -24,7 +24,7 @@
         conda_channel="pytorch",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def get_full_atom_coords(
diff --git a/graphein/protein/tensor/sequence.py b/graphein/protein/tensor/sequence.py
index 5f53c0e7..8231a04e 100644
--- a/graphein/protein/tensor/sequence.py
+++ b/graphein/protein/tensor/sequence.py
@@ -31,7 +31,7 @@
         conda_channel="pytorch",
         pip_install=True,
     )
-    log.warning(message)
+    log.debug(message)
 
 
 def get_sequence(
diff --git a/graphein/protein/visualisation.py b/graphein/protein/visualisation.py
index d02bd8dd..c87bcbac 100644
--- a/graphein/protein/visualisation.py
+++ b/graphein/protein/visualisation.py
@@ -33,7 +33,7 @@
         package="pytorch3d",
         conda_channel="pytorch3d",
     )
-    log.warning(message)
+    log.debug(message)
 
 try:
     from mpl_chord_diagram import chord_diagram
@@ -44,6 +44,7 @@
         pip_install=True,
         extras=True,
     )
+    log.debug(message)
 
 
 def plot_pointcloud(mesh: Meshes, title: str = "") -> Axes3D:

From 67c512d8003eb52421e0b4bd23e46f6f440c2d48 Mon Sep 17 00:00:00 2001
From: kierandidi <kieran.didi@gmail.com>
Date: Tue, 23 Apr 2024 15:22:33 +0100
Subject: [PATCH 4/4] [cleanup] change logging levels to debug to declutter
 output for pdb_data utils

---
 graphein/ml/datasets/pdb_data.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/graphein/ml/datasets/pdb_data.py b/graphein/ml/datasets/pdb_data.py
index 8b521f23..febd3cea 100644
--- a/graphein/ml/datasets/pdb_data.py
+++ b/graphein/ml/datasets/pdb_data.py
@@ -341,7 +341,7 @@ def _download_pdb_sequences(self):
         ):
             log.info("Downloading PDB sequences...")
             wget.download(self.pdb_sequences_url, out=str(self.root_dir))
-            log.info("Downloaded sequences")
+            log.debug("Downloaded sequences")
 
         # Unzip all collected sequences
         if not os.path.exists(self.root_dir / self.pdb_seqres_filename):
@@ -353,7 +353,7 @@ def _download_pdb_sequences(self):
                     self.root_dir / self.pdb_seqres_filename, "wb"
                 ) as f_out:
                     shutil.copyfileobj(f_in, f_out)
-            log.info("Unzipped sequences")
+            log.debug("Unzipped sequences")
 
     def _download_ligand_map(self):
         """Download ligand map from
@@ -362,7 +362,7 @@ def _download_ligand_map(self):
         if not os.path.exists(self.root_dir / self.ligand_map_filename):
             log.info("Downloading ligand map...")
             wget.download(self.ligand_map_url, out=str(self.root_dir))
-            log.info("Downloaded ligand map")
+            log.debug("Downloaded ligand map")
 
     def _download_source_map(self):
         """Download source map from
@@ -371,7 +371,7 @@ def _download_source_map(self):
         if not os.path.exists(self.root_dir / self.source_map_filename):
             log.info("Downloading source map...")
             wget.download(self.source_map_url, out=str(self.root_dir))
-            log.info("Downloaded source map")
+            log.debug("Downloaded source map")
 
     def _download_resolution(self):
         """Download source map from
@@ -380,7 +380,7 @@ def _download_resolution(self):
         if not os.path.exists(self.root_dir / self.resolution_filename):
             log.info("Downloading resolution map...")
             wget.download(self.resolution_url, out=str(self.root_dir))
-            log.info("Downloaded resolution map")
+            log.debug("Downloaded resolution map")
 
     def _download_entry_metadata(self):
         """Download PDB entry metadata from
@@ -391,7 +391,7 @@ def _download_entry_metadata(self):
         ):
             log.info("Downloading entry metadata...")
             wget.download(self.pdb_deposition_date_url, out=str(self.root_dir))
-            log.info("Downloaded entry metadata")
+            log.debug("Downloaded entry metadata")
 
     def _download_exp_type(self):
         """Download PDB experiment metadata from
@@ -400,7 +400,7 @@ def _download_exp_type(self):
         if not os.path.exists(self.root_dir / self.pdb_entry_type_filename):
             log.info("Downloading experiment type map...")
             wget.download(self.pdb_entry_type_url, out=str(self.root_dir))
-            log.info("Downloaded experiment type map")
+            log.debug("Downloaded experiment type map")
 
     def _download_pdb_availability(self):
         """Download PDB availability metadata from
@@ -409,7 +409,7 @@ def _download_pdb_availability(self):
         if not os.path.exists(self.root_dir / self.pdb_availability_filename):
             log.info("Downloading PDB availability map...")
             wget.download(self.pdb_availability_url, out=str(self.root_dir))
-            log.info("Downloaded PDB availability map")
+            log.debug("Downloaded PDB availability map")
 
     def _parse_ligand_map(self) -> Dict[str, List[str]]:
         """Parse the ligand maps for all PDB records.
@@ -1300,7 +1300,7 @@ def split_clusters(
             self.split_ratios,
             self.assign_leftover_rows_to_split_n,
         )
-        log.info("Done splitting clusters")
+        log.debug("Done splitting clusters")
 
         # Update splits
         for split in self.splits: