From 19accbf2034e7c2e3f4c5d8a89bcdc969c983d53 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Thu, 15 Aug 2024 11:02:36 +0200 Subject: [PATCH 1/7] hotfix greater than/less than operations in pdb_manager --- graphein/ml/datasets/pdb_data.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/graphein/ml/datasets/pdb_data.py b/graphein/ml/datasets/pdb_data.py index acd6716a..5843b51c 100644 --- a/graphein/ml/datasets/pdb_data.py +++ b/graphein/ml/datasets/pdb_data.py @@ -120,7 +120,6 @@ def __init__( ).name self.list_columns = ["ligands"] - self.labels = labels # Data self.download_metadata() @@ -166,10 +165,9 @@ def download_metadata(self): self._download_entry_metadata() self._download_exp_type() self._download_pdb_availability() - if self.labels: - self._download_pdb_chain_cath_uniprot_map() - self._download_cath_id_cath_code_map() - self._download_pdb_chain_ec_number_map() + self._download_pdb_chain_cath_uniprot_map() + self._download_cath_id_cath_code_map() + self._download_pdb_chain_ec_number_map() def get_unavailable_pdb_files( self, splits: Optional[List[str]] = None @@ -645,12 +643,15 @@ def _parse_cath_code(self) -> Dict[str, str]: with gzip.open( self.root_dir / self.cath_id_cath_code_filename, "rt" ) as f: + print(f) for line in f: + print(line) try: cath_id, cath_version, cath_code, cath_segment = ( line.strip().split() ) cath_mapping[cath_id] = cath_code + print(cath_id, cath_code) except ValueError: continue return cath_mapping @@ -1085,7 +1086,10 @@ def oligomeric( update: bool = False, ) -> pd.DataFrame: """Select molecules with a given oligmeric length. - I.e. ``df.n_chains ==/ oligomer`` + I.e. ``df.n_chains ==/ =< / >= oligomer`` + + N.b. the `comparison` arguments for `"greater"` and `"less"` are + `>=` and `=<` respectively. :param length: Oligomeric length of molecule, defaults to ``1``. :type length: int @@ -1106,9 +1110,9 @@ def oligomeric( if comparison == "equal": df = splits_df.loc[splits_df.n_chains == oligomer] elif comparison == "less": - df = splits_df.loc[splits_df.n_chains < oligomer] + df = splits_df.loc[splits_df.n_chains <= oligomer] elif comparison == "greater": - df = splits_df.loc[splits_df.n_chains > oligomer] + df = splits_df.loc[splits_df.n_chains >= oligomer] else: raise ValueError( "Comparison must be one of 'equal', 'less', or 'greater'." From 18a46b43bfaea35dadf3c796bfaac207b2b352d1 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Thu, 15 Aug 2024 11:04:28 +0200 Subject: [PATCH 2/7] bump changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1b66669..30329a5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ ### 1.7.7 - 03/08/2024 #### Bugfixes - +* Hotfix greater than/less than operations in PDBManager oligmer selection to include equality. [#408](https://github.com/a-r-j/graphein/pull/408). * Fixes progress bar for `download_pdb_multiprocessing`. [#394](https://github.com/a-r-j/graphein/pull/394) * Add support for DSSP >4. Backwards compatibility is still supported. [#355](https://github.com/a-r-j/graphein/pull/355). Fixes [#353](https://github.com/a-r-j/graphein/issues/353). * Fixes bug where RSA features are missing from nodes with insertion codes. [#355](https://github.com/a-r-j/graphein/pull/355). Fixes [#354](https://github.com/a-r-j/graphein/issues/353). From 77f2b386b2aae5572e1078bb3b3576dd29cd0e76 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 16 Sep 2024 14:25:00 +0200 Subject: [PATCH 3/7] md formatting --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30329a5c..abc1d5a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ### 1.7.7 - 03/08/2024 #### Bugfixes + * Hotfix greater than/less than operations in PDBManager oligmer selection to include equality. [#408](https://github.com/a-r-j/graphein/pull/408). * Fixes progress bar for `download_pdb_multiprocessing`. [#394](https://github.com/a-r-j/graphein/pull/394) * Add support for DSSP >4. Backwards compatibility is still supported. [#355](https://github.com/a-r-j/graphein/pull/355). Fixes [#353](https://github.com/a-r-j/graphein/issues/353). From c7807348e93a5f6023198dfbbf29e243ca58c603 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 16 Sep 2024 15:27:30 +0200 Subject: [PATCH 4/7] pin numpy <2 --- .requirements/base.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.requirements/base.in b/.requirements/base.in index 185bd1f2..a35d1487 100644 --- a/.requirements/base.in +++ b/.requirements/base.in @@ -10,7 +10,7 @@ looseversion matplotlib>=3.4.3 multipledispatch networkx -numpy +numpy<2 pandas plotly pydantic From bc21b9cf49d95010aef2698f6712e5575c095526 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Mon, 16 Sep 2024 18:26:36 +0200 Subject: [PATCH 5/7] increase test tolerance --- tests/protein/tensor/test_angles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/protein/tensor/test_angles.py b/tests/protein/tensor/test_angles.py index 04e4a2b1..a6fb4a53 100644 --- a/tests/protein/tensor/test_angles.py +++ b/tests/protein/tensor/test_angles.py @@ -86,7 +86,7 @@ def test_torsion_to_rad(): delta = ((delta + 2 * np.pi) / np.pi) % 2 np.testing.assert_allclose( - delta, torch.zeros_like(delta), atol=1e-4, rtol=1e-4 + delta, torch.zeros_like(delta), atol=1e-3, rtol=1e-3 ) From ce134021fcaf6a3046b32e5062db7659c6a584c6 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Tue, 17 Sep 2024 13:37:56 +0200 Subject: [PATCH 6/7] relax test tolerance --- tests/protein/tensor/test_angles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/protein/tensor/test_angles.py b/tests/protein/tensor/test_angles.py index a6fb4a53..2f56b385 100644 --- a/tests/protein/tensor/test_angles.py +++ b/tests/protein/tensor/test_angles.py @@ -126,7 +126,7 @@ def test_dihedrals_to_rad(): delta[delta.nonzero()] = torch.abs(delta[torch.nonzero(delta)] - 2 * np.pi) delta = ((delta + 2 * np.pi) / np.pi) % 2 - np.testing.assert_allclose(delta, torch.zeros_like(delta), atol=1e-5) + np.testing.assert_allclose(delta, torch.zeros_like(delta), atol=1e-4, rtol=1e-4) @pytest.mark.skipif(not TORCH_AVAIL, reason="PyTorch not available") From 60cc148c8ba2b6c962e7fb9158da8f82e34c9bb9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:38:21 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/protein/tensor/test_angles.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/protein/tensor/test_angles.py b/tests/protein/tensor/test_angles.py index 2f56b385..fe658d8e 100644 --- a/tests/protein/tensor/test_angles.py +++ b/tests/protein/tensor/test_angles.py @@ -126,7 +126,9 @@ def test_dihedrals_to_rad(): delta[delta.nonzero()] = torch.abs(delta[torch.nonzero(delta)] - 2 * np.pi) delta = ((delta + 2 * np.pi) / np.pi) % 2 - np.testing.assert_allclose(delta, torch.zeros_like(delta), atol=1e-4, rtol=1e-4) + np.testing.assert_allclose( + delta, torch.zeros_like(delta), atol=1e-4, rtol=1e-4 + ) @pytest.mark.skipif(not TORCH_AVAIL, reason="PyTorch not available")