Reformat with black

theochem · Oct 6, 2024 · 9ad3a0d · 9ad3a0d
1 parent 3e39875
commit 9ad3a0d
Show file tree

Hide file tree

Showing 6 changed files with 61 additions and 54 deletions.
diff --git a/selector/methods/base.py b/selector/methods/base.py
@@ -34,12 +34,13 @@
 class SelectionBase(ABC):
     """Base class for selecting subset of sample points."""
 
-    def select(self,
-               x: np.ndarray,
-               size: int,
-               labels: np.ndarray = None,
-               proportional_selection: bool = True,
-               ) -> list:
+    def select(
+        self,
+        x: np.ndarray,
+        size: int,
+        labels: np.ndarray = None,
+        proportional_selection: bool = True,
+    ) -> list:
         """Return indices representing subset of sample points.
 
         Parameters
@@ -130,7 +131,7 @@ def select(self,
             # (pop < size_each_cluster) and needs to be done iteratively until all remaining clusters
             # have at least size_each_cluster samples
             while np.any(
-                    [value <= size_each_cluster for value in pop_clusters.values() if value != 0]
+                [value <= size_each_cluster for value in pop_clusters.values() if value != 0]
             ):
                 for unique_label in unique_labels:
                     if pop_clusters[unique_label] != 0:
@@ -143,7 +144,8 @@ def select(self,
                 # update number of samples to be selected from each cluster
                 totally_used_clusters = list(pop_clusters.values()).count(0)
                 size_each_cluster = (size - len(np.hstack(selected_ids))) // (
-                            num_clusters - totally_used_clusters)
+                    num_clusters - totally_used_clusters
+                )
 
                 warnings.warn(
                     f"Number of molecules in one cluster is less than"
@@ -165,7 +167,7 @@ def select(self,
 
     @abstractmethod
     def select_from_cluster(
-            self, x: np.ndarray, size: int, labels: np.ndarray = None
+        self, x: np.ndarray, size: int, labels: np.ndarray = None
     ) -> np.ndarray:
         """Return indices representing subset of sample points from one cluster.
 

diff --git a/selector/methods/distance.py b/selector/methods/distance.py
@@ -459,9 +459,7 @@ class DISE(SelectionBase):
 
     """
 
-    def __init__(
-        self, r0=None, ref_index=None, tol=0.05, n_iter=10, p=2.0, eps=0.0, fun_dist=None
-    ):
+    def __init__(self, r0=None, ref_index=None, tol=0.05, n_iter=10, p=2.0, eps=0.0, fun_dist=None):
         """
         Initialize class.
 

diff --git a/selector/methods/partition.py b/selector/methods/partition.py
@@ -666,4 +666,3 @@ def select_from_cluster(self, arr, num_selected, cluster_ids=None):
                 )
             count += 1
         return selected
-
diff --git a/selector/methods/tests/common.py b/selector/methods/tests/common.py
@@ -43,14 +43,17 @@ def generate_synthetic_cluster_data():
     # generate the second cluster with 6 points
     cluster_two = np.array([[3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5]])
     # generate the third cluster with 9 points
-    cluster_three = np.array([[6, 0], [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [6, 8]])
+    cluster_three = np.array(
+        [[6, 0], [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [6, 8]]
+    )
     # concatenate the clusters
     coords = np.vstack([cluster_one, cluster_two, cluster_three])
     # generate the labels
     labels = np.hstack([[0 for _ in range(3)], [1 for _ in range(6)], [2 for _ in range(9)]])
 
     return coords, labels, cluster_one, cluster_two, cluster_three
 
+
 def generate_synthetic_data(
     n_samples: int = 100,
     n_features: int = 2,
@@ -136,8 +139,6 @@ def get_data_file_path(file_name):
         The absolute path of the data file inside the package
 
     """
-    data_file_path = resources.files("selector.methods.tests").joinpath(
-        f"data/{file_name}"
-    )
+    data_file_path = resources.files("selector.methods.tests").joinpath(f"data/{file_name}")
 
     return data_file_path
diff --git a/selector/methods/tests/test_distance.py b/selector/methods/tests/test_distance.py
@@ -141,20 +141,22 @@ def test_maxmin():
     )
     assert_equal(selected_mocked, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 15, 10, 13, 9, 18])
 
+
 def test_maxmin_proportional_selection():
     """Test MaxMin class with proportional selection."""
     # generate the first cluster with 3 points
     coords, labels, cluster_one, cluster_two, cluster_three = generate_synthetic_cluster_data()
     # instantiate the MaxMin class
     collector = MaxMin(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
     # select 6 points with proportional selection from each cluster
-    selected_ids = collector.select(coords,
-                                    size=6,
-                                    labels=labels,
-                                    proportional_selection=True,
-                                    )
+    selected_ids = collector.select(
+        coords,
+        size=6,
+        labels=labels,
+        proportional_selection=True,
+    )
     # make sure all the selected indices are the same with expectation
-    assert_equal(selected_ids,[0, 3, 8, 9, 17, 13])
+    assert_equal(selected_ids, [0, 3, 8, 9, 17, 13])
     # check how many points are selected from each cluster
     assert_equal(len(selected_ids), 6)
     # check the number of points selected from cluster one
@@ -177,14 +179,15 @@ def test_maxmin_proportional_selection_imbalance_1():
     # instantiate the MaxMin class
     collector = MaxMin(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
     # select 12 points with proportional selection from each cluster
-    selected_ids = collector.select(coords,
-                                    size=9,
-                                    labels=labels,
-                                    proportional_selection=True,
-                                    )
+    selected_ids = collector.select(
+        coords,
+        size=9,
+        labels=labels,
+        proportional_selection=True,
+    )
 
     # make sure all the selected indices are the same with expectation
-    assert_equal(selected_ids,[0, 2, 6, 12, 15, 38, 16, 41, 36])
+    assert_equal(selected_ids, [0, 2, 6, 12, 15, 38, 16, 41, 36])
     # check how many points are selected from each cluster
     assert_equal(len(selected_ids), 9)
     # check the number of points selected from cluster one
@@ -207,14 +210,15 @@ def test_maxmin_proportional_selection_imbalance_2():
     # instantiate the MaxMin class
     collector = MaxMin(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
     # select 12 points with proportional selection from each cluster
-    selected_ids = collector.select(coords,
-                                    size=14,
-                                    labels=labels,
-                                    proportional_selection=True,
-                                    )
+    selected_ids = collector.select(
+        coords,
+        size=14,
+        labels=labels,
+        proportional_selection=True,
+    )
 
     # # make sure all the selected indices are the same with expectation
-    assert_equal(selected_ids,[0, 3, 9, 6, 14, 36, 53, 17, 44, 23, 28, 50, 52, 49])
+    assert_equal(selected_ids, [0, 3, 9, 6, 14, 36, 53, 17, 44, 23, 28, 50, 52, 49])
     print(f"selected_ids: {selected_ids}")
     # check how many points are selected from each cluster
     assert_equal(len(selected_ids), 14)
@@ -355,13 +359,14 @@ def test_maxsum_proportional_selection():
     # instantiate the MaxSum class
     collector = MaxSum(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
     # select 6 points with proportional selection from each cluster
-    selected_ids = collector.select(coords,
-                                    size=6,
-                                    labels=labels,
-                                    proportional_selection=True,
-                                    )
+    selected_ids = collector.select(
+        coords,
+        size=6,
+        labels=labels,
+        proportional_selection=True,
+    )
     # make sure all the selected indices are the same with expectation
-    assert_equal(selected_ids,[0, 3, 8, 9, 17, 10])
+    assert_equal(selected_ids, [0, 3, 8, 9, 17, 10])
     # check how many points are selected from each cluster
     assert_equal(len(selected_ids), 6)
     # check the number of points selected from cluster one
@@ -426,13 +431,14 @@ def test_optisim_proportional_selection():
     # instantiate the Optisim class
     collector = OptiSim(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
     # select 6 points with proportional selection from each cluster
-    selected_ids = collector.select(coords,
-                                    size=6,
-                                    labels=labels,
-                                    proportional_selection=True,
-                                    )
+    selected_ids = collector.select(
+        coords,
+        size=6,
+        labels=labels,
+        proportional_selection=True,
+    )
     # make sure all the selected indices are the same with expectation
-    assert_equal(selected_ids,[0, 3, 8, 9, 17, 13])
+    assert_equal(selected_ids, [0, 3, 8, 9, 17, 13])
     # check how many points are selected from each cluster
     assert_equal(len(selected_ids), 6)
     # check the number of points selected from cluster one
@@ -442,6 +448,7 @@ def test_optisim_proportional_selection():
     # check the number of points selected from cluster three
     assert_equal((labels[selected_ids] == 2).sum(), 3)
 
+
 def test_directed_sphere_size_error():
     """Test DirectedSphereExclusion error when too many points requested."""
     x = np.array([[1, 9]] * 100)
@@ -552,13 +559,14 @@ def test_directed_sphere_proportional_selection():
     # instantiate the DISE class
     collector = DISE(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
     # select 6 points with proportional selection from each cluster
-    selected_ids = collector.select(coords,
-                                    size=6,
-                                    labels=labels,
-                                    proportional_selection=True,
-                                    )
+    selected_ids = collector.select(
+        coords,
+        size=6,
+        labels=labels,
+        proportional_selection=True,
+    )
     # make sure all the selected indices are the same with expectation
-    assert_equal(selected_ids,[0, 3, 7, 9, 12, 15])
+    assert_equal(selected_ids, [0, 3, 7, 9, 12, 15])
     # check how many points are selected from each cluster
     assert_equal(len(selected_ids), 6)
     # check the number of points selected from cluster one

diff --git a/selector/methods/tests/test_partition.py b/selector/methods/tests/test_partition.py
@@ -183,4 +183,3 @@ def test_medoid():
     selector = Medoid()
     selected_ids = selector.select(features, size=2)
     assert_equal(selected_ids, [0, 3])
-