Skip to content

Commit

Permalink
Reformat with black
Browse files Browse the repository at this point in the history
  • Loading branch information
FanwangM committed Oct 6, 2024
1 parent 3e39875 commit 9ad3a0d
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 54 deletions.
20 changes: 11 additions & 9 deletions selector/methods/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@
class SelectionBase(ABC):
"""Base class for selecting subset of sample points."""

def select(self,
x: np.ndarray,
size: int,
labels: np.ndarray = None,
proportional_selection: bool = True,
) -> list:
def select(
self,
x: np.ndarray,
size: int,
labels: np.ndarray = None,
proportional_selection: bool = True,
) -> list:
"""Return indices representing subset of sample points.
Parameters
Expand Down Expand Up @@ -130,7 +131,7 @@ def select(self,
# (pop < size_each_cluster) and needs to be done iteratively until all remaining clusters
# have at least size_each_cluster samples
while np.any(
[value <= size_each_cluster for value in pop_clusters.values() if value != 0]
[value <= size_each_cluster for value in pop_clusters.values() if value != 0]
):
for unique_label in unique_labels:
if pop_clusters[unique_label] != 0:
Expand All @@ -143,7 +144,8 @@ def select(self,
# update number of samples to be selected from each cluster
totally_used_clusters = list(pop_clusters.values()).count(0)
size_each_cluster = (size - len(np.hstack(selected_ids))) // (
num_clusters - totally_used_clusters)
num_clusters - totally_used_clusters
)

warnings.warn(
f"Number of molecules in one cluster is less than"
Expand All @@ -165,7 +167,7 @@ def select(self,

@abstractmethod
def select_from_cluster(
self, x: np.ndarray, size: int, labels: np.ndarray = None
self, x: np.ndarray, size: int, labels: np.ndarray = None
) -> np.ndarray:
"""Return indices representing subset of sample points from one cluster.
Expand Down
4 changes: 1 addition & 3 deletions selector/methods/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,9 +459,7 @@ class DISE(SelectionBase):
"""

def __init__(
self, r0=None, ref_index=None, tol=0.05, n_iter=10, p=2.0, eps=0.0, fun_dist=None
):
def __init__(self, r0=None, ref_index=None, tol=0.05, n_iter=10, p=2.0, eps=0.0, fun_dist=None):
"""
Initialize class.
Expand Down
1 change: 0 additions & 1 deletion selector/methods/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,4 +666,3 @@ def select_from_cluster(self, arr, num_selected, cluster_ids=None):
)
count += 1
return selected

9 changes: 5 additions & 4 deletions selector/methods/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,17 @@ def generate_synthetic_cluster_data():
# generate the second cluster with 6 points
cluster_two = np.array([[3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5]])
# generate the third cluster with 9 points
cluster_three = np.array([[6, 0], [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [6, 8]])
cluster_three = np.array(
[[6, 0], [6, 1], [6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [6, 8]]
)
# concatenate the clusters
coords = np.vstack([cluster_one, cluster_two, cluster_three])
# generate the labels
labels = np.hstack([[0 for _ in range(3)], [1 for _ in range(6)], [2 for _ in range(9)]])

return coords, labels, cluster_one, cluster_two, cluster_three


def generate_synthetic_data(
n_samples: int = 100,
n_features: int = 2,
Expand Down Expand Up @@ -136,8 +139,6 @@ def get_data_file_path(file_name):
The absolute path of the data file inside the package
"""
data_file_path = resources.files("selector.methods.tests").joinpath(
f"data/{file_name}"
)
data_file_path = resources.files("selector.methods.tests").joinpath(f"data/{file_name}")

return data_file_path
80 changes: 44 additions & 36 deletions selector/methods/tests/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,20 +141,22 @@ def test_maxmin():
)
assert_equal(selected_mocked, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 15, 10, 13, 9, 18])


def test_maxmin_proportional_selection():
"""Test MaxMin class with proportional selection."""
# generate the first cluster with 3 points
coords, labels, cluster_one, cluster_two, cluster_three = generate_synthetic_cluster_data()
# instantiate the MaxMin class
collector = MaxMin(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
# select 6 points with proportional selection from each cluster
selected_ids = collector.select(coords,
size=6,
labels=labels,
proportional_selection=True,
)
selected_ids = collector.select(
coords,
size=6,
labels=labels,
proportional_selection=True,
)
# make sure all the selected indices are the same with expectation
assert_equal(selected_ids,[0, 3, 8, 9, 17, 13])
assert_equal(selected_ids, [0, 3, 8, 9, 17, 13])
# check how many points are selected from each cluster
assert_equal(len(selected_ids), 6)
# check the number of points selected from cluster one
Expand All @@ -177,14 +179,15 @@ def test_maxmin_proportional_selection_imbalance_1():
# instantiate the MaxMin class
collector = MaxMin(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
# select 12 points with proportional selection from each cluster
selected_ids = collector.select(coords,
size=9,
labels=labels,
proportional_selection=True,
)
selected_ids = collector.select(
coords,
size=9,
labels=labels,
proportional_selection=True,
)

# make sure all the selected indices are the same with expectation
assert_equal(selected_ids,[0, 2, 6, 12, 15, 38, 16, 41, 36])
assert_equal(selected_ids, [0, 2, 6, 12, 15, 38, 16, 41, 36])
# check how many points are selected from each cluster
assert_equal(len(selected_ids), 9)
# check the number of points selected from cluster one
Expand All @@ -207,14 +210,15 @@ def test_maxmin_proportional_selection_imbalance_2():
# instantiate the MaxMin class
collector = MaxMin(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
# select 12 points with proportional selection from each cluster
selected_ids = collector.select(coords,
size=14,
labels=labels,
proportional_selection=True,
)
selected_ids = collector.select(
coords,
size=14,
labels=labels,
proportional_selection=True,
)

# # make sure all the selected indices are the same with expectation
assert_equal(selected_ids,[0, 3, 9, 6, 14, 36, 53, 17, 44, 23, 28, 50, 52, 49])
assert_equal(selected_ids, [0, 3, 9, 6, 14, 36, 53, 17, 44, 23, 28, 50, 52, 49])
print(f"selected_ids: {selected_ids}")
# check how many points are selected from each cluster
assert_equal(len(selected_ids), 14)
Expand Down Expand Up @@ -355,13 +359,14 @@ def test_maxsum_proportional_selection():
# instantiate the MaxSum class
collector = MaxSum(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
# select 6 points with proportional selection from each cluster
selected_ids = collector.select(coords,
size=6,
labels=labels,
proportional_selection=True,
)
selected_ids = collector.select(
coords,
size=6,
labels=labels,
proportional_selection=True,
)
# make sure all the selected indices are the same with expectation
assert_equal(selected_ids,[0, 3, 8, 9, 17, 10])
assert_equal(selected_ids, [0, 3, 8, 9, 17, 10])
# check how many points are selected from each cluster
assert_equal(len(selected_ids), 6)
# check the number of points selected from cluster one
Expand Down Expand Up @@ -426,13 +431,14 @@ def test_optisim_proportional_selection():
# instantiate the Optisim class
collector = OptiSim(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
# select 6 points with proportional selection from each cluster
selected_ids = collector.select(coords,
size=6,
labels=labels,
proportional_selection=True,
)
selected_ids = collector.select(
coords,
size=6,
labels=labels,
proportional_selection=True,
)
# make sure all the selected indices are the same with expectation
assert_equal(selected_ids,[0, 3, 8, 9, 17, 13])
assert_equal(selected_ids, [0, 3, 8, 9, 17, 13])
# check how many points are selected from each cluster
assert_equal(len(selected_ids), 6)
# check the number of points selected from cluster one
Expand All @@ -442,6 +448,7 @@ def test_optisim_proportional_selection():
# check the number of points selected from cluster three
assert_equal((labels[selected_ids] == 2).sum(), 3)


def test_directed_sphere_size_error():
"""Test DirectedSphereExclusion error when too many points requested."""
x = np.array([[1, 9]] * 100)
Expand Down Expand Up @@ -552,13 +559,14 @@ def test_directed_sphere_proportional_selection():
# instantiate the DISE class
collector = DISE(fun_dist=lambda x: pairwise_distances(x, metric="euclidean"), ref_index=0)
# select 6 points with proportional selection from each cluster
selected_ids = collector.select(coords,
size=6,
labels=labels,
proportional_selection=True,
)
selected_ids = collector.select(
coords,
size=6,
labels=labels,
proportional_selection=True,
)
# make sure all the selected indices are the same with expectation
assert_equal(selected_ids,[0, 3, 7, 9, 12, 15])
assert_equal(selected_ids, [0, 3, 7, 9, 12, 15])
# check how many points are selected from each cluster
assert_equal(len(selected_ids), 6)
# check the number of points selected from cluster one
Expand Down
1 change: 0 additions & 1 deletion selector/methods/tests/test_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,3 @@ def test_medoid():
selector = Medoid()
selected_ids = selector.select(features, size=2)
assert_equal(selected_ids, [0, 3])

0 comments on commit 9ad3a0d

Please sign in to comment.