From d7e18cbea7abb9479e834af1af95c394fbfb1792 Mon Sep 17 00:00:00 2001
From: gwaygenomics <gregory.way@gmail.com>
Date: Tue, 20 Aug 2019 13:54:44 -0400
Subject: [PATCH 1/2] all a coercable string as input to subsample_n

---
 pycytominer/aggregate.py | 44 +++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py
index 3be4dbe2..ef145666 100644
--- a/pycytominer/aggregate.py
+++ b/pycytominer/aggregate.py
@@ -55,11 +55,6 @@ def __init__(
             0 < subsample_frac and 1 >= subsample_frac
         ), "subsample_frac must be between 0 and 1"
 
-        # Check that the user didn't specify both subset frac and
-        assert (
-            subsample_frac == 1 or subsample_n == "all"
-        ), "Do not set both subsample_frac and subsample_n"
-
         self.sql_file = sql_file
         self.strata = strata
         self.features = features
@@ -73,9 +68,16 @@ def __init__(
         self.subsampling_random_state = subsampling_random_state
         self.is_aggregated = False
 
+        if self.subsample_n != "all":
+            try:
+                self.subsample_n = int(self.subsample_n)
+            except ValueError:
+                print("subsample n must be an integer or coercable")
+
         # Connect to sqlite engine
         self.engine = create_engine(self.sql_file)
         self.conn = self.engine.connect()
+        self._check_subsampling()
 
         if load_image_data:
             self.load_image()
@@ -90,16 +92,22 @@ def _check_compartments(self, compartments):
         elif isinstance(compartments, str):
             assert compartments in valid_compartments, error_str
 
+    def _check_subsampling(self):
+        # Check that the user didn't specify both subset frac and
+        assert (
+            self.subsample_frac == 1 or self.subsample_n == "all"
+        ), "Do not set both subsample_frac and subsample_n"
+
     def set_output_file(self, output_file):
         self.output_file = output_file
 
     def set_subsample_frac(self, subsample_frac):
-        self.subsample_n = "all"
         self.subsample_frac = subsample_frac
+        self._check_subsampling()
 
     def set_subsample_n(self, subsample_n):
-        self.subsample_frac = 1
         self.subsample_n = subsample_n
+        self._check_subsampling()
 
     def set_subsample_random_state(self, random_state):
         self.subsampling_random_state = random_state
@@ -109,9 +117,7 @@ def load_image(self):
         Load image table from sqlite file
         """
         # Extract image metadata
-        image_cols = (
-            "TableNumber, ImageNumber, {}".format(", ".join(self.strata))
-        )
+        image_cols = "TableNumber, ImageNumber, {}".format(", ".join(self.strata))
         image_query = "select {} from image".format(image_cols)
         self.image_df = pd.read_sql(sql=image_query, con=self.conn)
 
@@ -166,7 +172,7 @@ def subsample_profiles(self, x, random_state="none"):
                 x, frac=self.subsample_frac, random_state=self.subsampling_random_state
             )
 
-    def get_subsample(self, compartment="cells", subsample_frac=1, subsample_n="all"):
+    def get_subsample(self, compartment="cells"):
         """
         Extract subsample from sqlite file
 
@@ -175,12 +181,6 @@ def get_subsample(self, compartment="cells", subsample_frac=1, subsample_n="all"
         """
         self._check_compartments(compartment)
 
-        if subsample_frac < 1:
-            self.set_subsample_frac(subsample_frac)
-
-        if isinstance(subsample_n, int):
-            self.set_subsample_n(subsample_n)
-
         query_cols = "TableNumber, ImageNumber, ObjectNumber"
         query = "select {} from {}".format(query_cols, compartment)
 
@@ -301,15 +301,9 @@ def aggregate(
     population_df = population_df.groupby(strata)
 
     if operation == "median":
-        population_df = (
-            population_df.median()
-            .reset_index()
-        )
+        population_df = population_df.median().reset_index()
     else:
-        population_df = (
-            population_df.mean()
-            .reset_index()
-        )
+        population_df = population_df.mean().reset_index()
 
     # Aggregated image number and object number do not make sense
     for col in ["ImageNumber", "ObjectNumber"]:

From 8d07f70e4ba83a673cef198cecf824e0e961b7be Mon Sep 17 00:00:00 2001
From: gwaygenomics <gregory.way@gmail.com>
Date: Tue, 20 Aug 2019 13:58:54 -0400
Subject: [PATCH 2/2] closes #28

---
 pycytominer/aggregate.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py
index ef145666..e9122383 100644
--- a/pycytominer/aggregate.py
+++ b/pycytominer/aggregate.py
@@ -67,6 +67,7 @@ def __init__(
         self.subset_data = "none"
         self.subsampling_random_state = subsampling_random_state
         self.is_aggregated = False
+        self.is_subset_computed = False
 
         if self.subsample_n != "all":
             try:
@@ -133,6 +134,7 @@ def count_cells(self, compartment="cells", count_subset=False):
 
         if count_subset:
             assert self.is_aggregated, "Make sure to aggregate_profiles() first!"
+            assert self.is_subset_computed, "Make sure to get_subsample() first!"
             count_df = pd.crosstab(
                 self.subset_data.loc[:, self.strata[1]],
                 self.subset_data.loc[:, self.strata[0]],
@@ -194,6 +196,7 @@ def get_subsample(self, compartment="cells"):
             .apply(lambda x: self.subsample_profiles(x))
             .reset_index(drop=True)
         )
+        self.is_subset_computed = True
 
     def aggregate_compartment(self, compartment, compute_subsample=False):
         """