From 48b1e005cdc02d0a2abd96bf838900b34c3d23d2 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Wed, 27 Jul 2022 11:50:46 -0400 Subject: [PATCH 1/7] Updated functionality for add_rfree and copy_rfree --- reciprocalspaceship/utils/rfree.py | 37 ++++++++++--- tests/utils/test_rfree.py | 86 ++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 9 deletions(-) diff --git a/reciprocalspaceship/utils/rfree.py b/reciprocalspaceship/utils/rfree.py index 3516b150..33d8928c 100644 --- a/reciprocalspaceship/utils/rfree.py +++ b/reciprocalspaceship/utils/rfree.py @@ -3,7 +3,7 @@ from reciprocalspaceship.dtypes import MTZIntDtype -def add_rfree(dataset, fraction=0.05, ccp4_convention=False, inplace=False): +def add_rfree(dataset, fraction=0.05, ccp4_convention=False, inplace=False, seed=None): """ Add an r-free flag to the dataset object for refinement. R-free flags are used to identify reflections which are not used in automated refinement routines. @@ -23,6 +23,10 @@ def add_rfree(dataset, fraction=0.05, ccp4_convention=False, inplace=False): 1 is test set, 0 is working set, and key is "R-free-flags". See https://www.ccp4.ac.uk/html/freerflag.html#description for convention details. inplace : bool, optional + seed : int, optional + Seed to be passed to numpy.random.default_rng random number generator + for reproducible r-free flags. If None (default), r-free flags will + different each time. Returns ------- @@ -31,7 +35,9 @@ def add_rfree(dataset, fraction=0.05, ccp4_convention=False, inplace=False): """ if not inplace: dataset = dataset.copy() - test_set = np.random.random(len(dataset)) <= fraction + + rng = np.random.default_rng(seed) + test_set = rng.random(len(dataset)) <= fraction if not ccp4_convention: rfree_key = "R-free-flags" @@ -46,7 +52,7 @@ def add_rfree(dataset, fraction=0.05, ccp4_convention=False, inplace=False): return dataset -def copy_rfree(dataset, dataset_with_rfree, inplace=False): +def copy_rfree(dataset, dataset_with_rfree, inplace=False, custom_rfree_key=None): """ Copy the rfree flag from one dataset object to another. @@ -58,6 +64,10 @@ def copy_rfree(dataset, dataset_with_rfree, inplace=False): A dataset with desired r-free flags. inplace : bool, optional Whether to operate in place or return a copy + custom_rfree_key : str, optional + Name of the column containing rfree flags in dataset_with_rfree. + If None, dataset_with_rfree will be checked for column "R-free-flags" + (phenix convention) then column "FreeR_flag" (ccp4 convention) Returns ------- @@ -66,8 +76,23 @@ def copy_rfree(dataset, dataset_with_rfree, inplace=False): if not inplace: dataset = dataset.copy() - dataset["R-free-flags"] = 0 - dataset["R-free-flags"] = dataset["R-free-flags"].astype(MTZIntDtype()) + if custom_rfree_key is not None: + if custom_rfree_key not in dataset_with_rfree.columns: + raise ValueError( + f"""Supplied dataset_with_rfree contains no column {custom_rfree_key}""" + ) + rfree_key = custom_rfree_key + elif "R-free-flags" in dataset_with_rfree.columns: + rfree_key = "R-free-flags" + elif "FreeR_flag" in dataset_with_rfree.columns: + rfree_key = "FreeR_flag" + else: + raise ValueError( + """Failed to automatically find r-free flags in dataset_with_rfree. Please supply a custom_rfree_key""" + ) + + dataset[rfree_key] = 0 + dataset[rfree_key] = dataset[rfree_key].astype(MTZIntDtype()) idx = dataset.index.intersection(dataset_with_rfree.index) - dataset.loc[idx, "R-free-flags"] = dataset_with_rfree.loc[idx, "R-free-flags"] + dataset.loc[idx, rfree_key] = dataset_with_rfree.loc[idx, rfree_key] return dataset diff --git a/tests/utils/test_rfree.py b/tests/utils/test_rfree.py index df12d7f6..7b1019c8 100644 --- a/tests/utils/test_rfree.py +++ b/tests/utils/test_rfree.py @@ -10,13 +10,18 @@ @pytest.mark.parametrize("fraction", [0.05, 0.10, 0.15]) @pytest.mark.parametrize("ccp4_convention", [False, True]) @pytest.mark.parametrize("inplace", [False, True]) -def test_add_rfree(data_fmodel, fraction, ccp4_convention, inplace): +@pytest.mark.parametrize("seed", [None, 2022]) +def test_add_rfree(data_fmodel, fraction, ccp4_convention, inplace, seed): """ - Test rs.utils.add_rfee + Test rs.utils.add_rfree """ data_copy = data_fmodel.copy() rfree = rs.utils.add_rfree( - data_fmodel, fraction=fraction, ccp4_convention=ccp4_convention, inplace=inplace + data_fmodel, + fraction=fraction, + ccp4_convention=ccp4_convention, + inplace=inplace, + seed=seed, ) if ccp4_convention: @@ -38,8 +43,83 @@ def test_add_rfree(data_fmodel, fraction, ccp4_convention, inplace): assert np.all(data_fmodel == data_copy) assert np.all(data_fmodel == rfree.loc[:, rfree.columns != label_name]) + repeat_rfree = rs.utils.add_rfree( + data_fmodel, + fraction=fraction, + ccp4_convention=ccp4_convention, + inplace=False, + seed=seed, + ) + if seed is not None: + assert np.all(rfree == repeat_rfree) + else: + assert not np.all(rfree == repeat_rfree) + + +@pytest.mark.parametrize("ccp4_convention", [False, True]) +@pytest.mark.parametrize("inplace", [False, True]) +@pytest.mark.parametrize("custom_rfree_key", [None, "custom-rfree-key"]) +def test_copy_rfree(data_fmodel, ccp4_convention, inplace, custom_rfree_key): + """ + Test rs.utils.copy_rfree + """ + data_copy = data_fmodel.copy() + + # create dataset with rfree flags from which to copy + data_with_rfree = rs.utils.add_rfree( + data_fmodel, inplace=False, ccp4_convention=ccp4_convention + ) + + # handle different possible column names for rfree flags + if custom_rfree_key is not None: + if ccp4_convention: + rename_dict = {"FreeR_flag": custom_rfree_key} + else: + rename_dict = {"R-free-flags": custom_rfree_key} + + data_with_rfree.rename(columns=rename_dict, inplace=True) + rfree_key = custom_rfree_key + else: + if ccp4_convention: + rfree_key = "FreeR_flag" + else: + rfree_key = "R-free-flags" + + data_with_copied_rfree = rs.utils.copy_rfree( + data_fmodel, data_with_rfree, inplace=inplace, custom_rfree_key=custom_rfree_key + ) + + if inplace: + assert id(data_with_copied_rfree) == id(data_fmodel) + assert rfree_key in data_fmodel.columns + assert np.array_equal( + data_fmodel[rfree_key].values, data_with_rfree[rfree_key].values + ) + else: + assert id(data_with_copied_rfree) != id(data_fmodel) + assert rfree_key not in data_fmodel.columns + assert np.array_equal( + data_with_copied_rfree[rfree_key].values, data_with_rfree[rfree_key].values + ) + assert np.all(data_fmodel == data_copy) + + +def test_copy_rfree_errors(data_fmodel): + """ + Test expected ValueErrors for rs.utils.copy_rfree + """ + with pytest.raises(ValueError): + rs.utils.copy_rfree(data_fmodel, data_fmodel) + + with pytest.raises(ValueError): + rs.utils.copy_rfree(data_fmodel, data_fmodel, custom_rfree_key="missing key") + class TestRfree(unittest.TestCase): + """ + Test rs.utils.copy_free - legacy version using the unittest framework. + """ + def test_copy_rfree(self): datadir = join(abspath(dirname(__file__)), "../data/fmodel") From 7930bc8bbae371c6cd7a7c7e1d14af0d54cccb71 Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Fri, 29 Jul 2022 09:08:54 -0400 Subject: [PATCH 2/7] Revisions to r-free functionality requested by Jack Greisman - renamed custom_rfree_key to custom_rfree_key - removed obsolete unittest code --- reciprocalspaceship/utils/rfree.py | 11 +++---- tests/utils/test_rfree.py | 51 ++++-------------------------- 2 files changed, 12 insertions(+), 50 deletions(-) diff --git a/reciprocalspaceship/utils/rfree.py b/reciprocalspaceship/utils/rfree.py index 33d8928c..03d78753 100644 --- a/reciprocalspaceship/utils/rfree.py +++ b/reciprocalspaceship/utils/rfree.py @@ -52,7 +52,7 @@ def add_rfree(dataset, fraction=0.05, ccp4_convention=False, inplace=False, seed return dataset -def copy_rfree(dataset, dataset_with_rfree, inplace=False, custom_rfree_key=None): +def copy_rfree(dataset, dataset_with_rfree, inplace=False, rfree_key=None): """ Copy the rfree flag from one dataset object to another. @@ -64,7 +64,7 @@ def copy_rfree(dataset, dataset_with_rfree, inplace=False, custom_rfree_key=None A dataset with desired r-free flags. inplace : bool, optional Whether to operate in place or return a copy - custom_rfree_key : str, optional + rfree_key : str, optional Name of the column containing rfree flags in dataset_with_rfree. If None, dataset_with_rfree will be checked for column "R-free-flags" (phenix convention) then column "FreeR_flag" (ccp4 convention) @@ -76,12 +76,11 @@ def copy_rfree(dataset, dataset_with_rfree, inplace=False, custom_rfree_key=None if not inplace: dataset = dataset.copy() - if custom_rfree_key is not None: - if custom_rfree_key not in dataset_with_rfree.columns: + if rfree_key is not None: + if rfree_key not in dataset_with_rfree.columns: raise ValueError( - f"""Supplied dataset_with_rfree contains no column {custom_rfree_key}""" + f"""Supplied dataset_with_rfree contains no column {rfree_key}""" ) - rfree_key = custom_rfree_key elif "R-free-flags" in dataset_with_rfree.columns: rfree_key = "R-free-flags" elif "FreeR_flag" in dataset_with_rfree.columns: diff --git a/tests/utils/test_rfree.py b/tests/utils/test_rfree.py index 7b1019c8..83da5083 100644 --- a/tests/utils/test_rfree.py +++ b/tests/utils/test_rfree.py @@ -58,8 +58,8 @@ def test_add_rfree(data_fmodel, fraction, ccp4_convention, inplace, seed): @pytest.mark.parametrize("ccp4_convention", [False, True]) @pytest.mark.parametrize("inplace", [False, True]) -@pytest.mark.parametrize("custom_rfree_key", [None, "custom-rfree-key"]) -def test_copy_rfree(data_fmodel, ccp4_convention, inplace, custom_rfree_key): +@pytest.mark.parametrize("rfree_key", [None, "custom-rfree-key"]) +def test_copy_rfree(data_fmodel, ccp4_convention, inplace, rfree_key): """ Test rs.utils.copy_rfree """ @@ -71,14 +71,13 @@ def test_copy_rfree(data_fmodel, ccp4_convention, inplace, custom_rfree_key): ) # handle different possible column names for rfree flags - if custom_rfree_key is not None: + if rfree_key is not None: if ccp4_convention: - rename_dict = {"FreeR_flag": custom_rfree_key} + rename_dict = {"FreeR_flag": rfree_key} else: - rename_dict = {"R-free-flags": custom_rfree_key} + rename_dict = {"R-free-flags": rfree_key} data_with_rfree.rename(columns=rename_dict, inplace=True) - rfree_key = custom_rfree_key else: if ccp4_convention: rfree_key = "FreeR_flag" @@ -86,7 +85,7 @@ def test_copy_rfree(data_fmodel, ccp4_convention, inplace, custom_rfree_key): rfree_key = "R-free-flags" data_with_copied_rfree = rs.utils.copy_rfree( - data_fmodel, data_with_rfree, inplace=inplace, custom_rfree_key=custom_rfree_key + data_fmodel, data_with_rfree, inplace=inplace, rfree_key=rfree_key ) if inplace: @@ -112,40 +111,4 @@ def test_copy_rfree_errors(data_fmodel): rs.utils.copy_rfree(data_fmodel, data_fmodel) with pytest.raises(ValueError): - rs.utils.copy_rfree(data_fmodel, data_fmodel, custom_rfree_key="missing key") - - -class TestRfree(unittest.TestCase): - """ - Test rs.utils.copy_free - legacy version using the unittest framework. - """ - - def test_copy_rfree(self): - - datadir = join(abspath(dirname(__file__)), "../data/fmodel") - data = rs.read_mtz(join(datadir, "9LYZ.mtz")) - data_rfree = rs.utils.add_rfree(data, inplace=False) - - # Test copy of R-free to copy of data - rfree = rs.utils.copy_rfree(data, data_rfree, inplace=False) - self.assertFalse(id(data) == id(rfree)) - self.assertFalse("R-free-flags" in data.columns) - self.assertTrue("R-free-flags" in rfree.columns) - self.assertTrue( - np.array_equal( - rfree["R-free-flags"].values, data_rfree["R-free-flags"].values - ) - ) - - # Test copy of R-free inplace - rfree = rs.utils.copy_rfree(data, data_rfree, inplace=True) - self.assertTrue(id(data) == id(rfree)) - self.assertTrue("R-free-flags" in data.columns) - self.assertTrue("R-free-flags" in rfree.columns) - self.assertTrue( - np.array_equal( - rfree["R-free-flags"].values, data_rfree["R-free-flags"].values - ) - ) - - return + rs.utils.copy_rfree(data_fmodel, data_fmodel, rfree_key="missing key") From 6a11ec38c1fd89180df5d822fd47dcb1afb52a9b Mon Sep 17 00:00:00 2001 From: Dennis Brookner Date: Fri, 29 Jul 2022 18:00:31 -0400 Subject: [PATCH 3/7] Fix text of ValueError ValueError still referred to the argument as "custom_rfree_key" rather than "rfree_key" --- reciprocalspaceship/utils/rfree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reciprocalspaceship/utils/rfree.py b/reciprocalspaceship/utils/rfree.py index 03d78753..9aa6e1b3 100644 --- a/reciprocalspaceship/utils/rfree.py +++ b/reciprocalspaceship/utils/rfree.py @@ -87,7 +87,7 @@ def copy_rfree(dataset, dataset_with_rfree, inplace=False, rfree_key=None): rfree_key = "FreeR_flag" else: raise ValueError( - """Failed to automatically find r-free flags in dataset_with_rfree. Please supply a custom_rfree_key""" + """Failed to automatically find r-free flags in dataset_with_rfree. Please supply an rfree_key""" ) dataset[rfree_key] = 0 From 9bb78b26d6936ae16e7b76e5a50fdf894cfeb45d Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Fri, 29 Jul 2022 18:08:13 -0400 Subject: [PATCH 4/7] Condensed test_copy_rfree_errors --- tests/utils/test_rfree.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/utils/test_rfree.py b/tests/utils/test_rfree.py index 83da5083..6d15a5c5 100644 --- a/tests/utils/test_rfree.py +++ b/tests/utils/test_rfree.py @@ -103,12 +103,17 @@ def test_copy_rfree(data_fmodel, ccp4_convention, inplace, rfree_key): assert np.all(data_fmodel == data_copy) -def test_copy_rfree_errors(data_fmodel): +@pytest.mark.parametrize("rfree_key", [None, "missing key"]) +def test_copy_rfree_errors(data_fmodel, rfree_key): """ Test expected ValueErrors for rs.utils.copy_rfree - """ - with pytest.raises(ValueError): - rs.utils.copy_rfree(data_fmodel, data_fmodel) + When rfree_key=None, copy_rfree searches for columns named + "R-free-flags" and "FreeR_flag", and throws a ValueError when neither + is found + + When rfree_key="missing key", copy_rfree throws a ValueError because + there is no column "missing key" + """ with pytest.raises(ValueError): - rs.utils.copy_rfree(data_fmodel, data_fmodel, rfree_key="missing key") + rs.utils.copy_rfree(data_fmodel, data_fmodel, rfree_key=rfree_key) From 11c65a5cad602ce5ac4c973d4231104a4a18a69b Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Sat, 30 Jul 2022 11:33:16 -0400 Subject: [PATCH 5/7] Make test_copy_rfree_errors more rigorous --- tests/utils/test_rfree.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/utils/test_rfree.py b/tests/utils/test_rfree.py index 6d15a5c5..436d17b2 100644 --- a/tests/utils/test_rfree.py +++ b/tests/utils/test_rfree.py @@ -1,3 +1,4 @@ +from multiprocessing.sharedctypes import Value import unittest from os.path import abspath, dirname, join @@ -103,17 +104,18 @@ def test_copy_rfree(data_fmodel, ccp4_convention, inplace, rfree_key): assert np.all(data_fmodel == data_copy) -@pytest.mark.parametrize("rfree_key", [None, "missing key"]) -def test_copy_rfree_errors(data_fmodel, rfree_key): +def test_copy_rfree_errors(data_fmodel): """ Test expected ValueErrors for rs.utils.copy_rfree - - When rfree_key=None, copy_rfree searches for columns named - "R-free-flags" and "FreeR_flag", and throws a ValueError when neither - is found - - When rfree_key="missing key", copy_rfree throws a ValueError because - there is no column "missing key" """ + # Raise ValueError because "R-free-flags" and "FreeR_flag" are missing with pytest.raises(ValueError): - rs.utils.copy_rfree(data_fmodel, data_fmodel, rfree_key=rfree_key) + rs.utils.copy_rfree(data_fmodel, data_fmodel) + + # Raise ValueError because "missing key" is missing, + # even though "R-free-flags" is present + data_with_standard_rfree = rs.utils.add_rfree(data_fmodel, inplace=False) + with pytest.raises(ValueError): + rs.utils.copy_rfree( + data_fmodel, data_with_standard_rfree, rfree_key="missing key" + ) From 8435444feca40eef5f1ca90d0a3779fc2b2309f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 30 Jul 2022 15:33:40 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/utils/test_rfree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_rfree.py b/tests/utils/test_rfree.py index 436d17b2..5f725cfd 100644 --- a/tests/utils/test_rfree.py +++ b/tests/utils/test_rfree.py @@ -1,5 +1,5 @@ -from multiprocessing.sharedctypes import Value import unittest +from multiprocessing.sharedctypes import Value from os.path import abspath, dirname, join import numpy as np From 2ffdaebcbd1a53353676e7d6a8eba039ca3d405e Mon Sep 17 00:00:00 2001 From: dennisbrookner Date: Sat, 30 Jul 2022 11:35:51 -0400 Subject: [PATCH 7/7] Remove unused import statements --- tests/utils/test_rfree.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/utils/test_rfree.py b/tests/utils/test_rfree.py index 436d17b2..2b4d963f 100644 --- a/tests/utils/test_rfree.py +++ b/tests/utils/test_rfree.py @@ -1,7 +1,3 @@ -from multiprocessing.sharedctypes import Value -import unittest -from os.path import abspath, dirname, join - import numpy as np import pytest