cytomining · gwaybio · Mar 6, 2020 · Mar 4, 2020 · Mar 4, 2020 · Mar 4, 2020
diff --git a/pycytominer/cyto_utils/transform.py b/pycytominer/cyto_utils/transform.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 from scipy.linalg import eigh
+from scipy.stats import median_absolute_deviation
 from sklearn.base import BaseEstimator, TransformerMixin
 
 
@@ -52,3 +53,35 @@ def transform(self, X, y=None):
         Whiten an input matrix a given population dataframe
         """
         return np.dot(X - self.mu, self.W)
+
+
+class RobustMAD(BaseEstimator, TransformerMixin):
+    """
+    Class to perform a "Robust" normalization with respect to median and mad
+
+        scaled = (x - median) / mad
+    """
+
+    def __init__(self):
+        pass
+
+    def fit(self, X, y=None):
+        """
+        Compute the median and mad to be used for later scaling.
+
+        Argument:
+        X - pandas dataframe to fit RobustMAD transform
+        """
+        # Get the mean of the features (columns) and center if specified
+        self.median = X.median()
+        self.mad = pd.Series(median_absolute_deviation(X), index=self.median.index)
+        return self
+
+    def transform(self, X, copy=None):
+        """
+        Apply the RobustMAD calculation
+
+        Argument:
+        X - pandas dataframe to apply RobustMAD transform
+        """
+        return (X - self.median) / self.mad
diff --git a/pycytominer/normalize.py b/pycytominer/normalize.py
@@ -6,7 +6,7 @@
 from sklearn.preprocessing import StandardScaler, RobustScaler
 
 from pycytominer.cyto_utils import output, infer_cp_features
-from pycytominer.cyto_utils.transform import Whiten
+from pycytominer.cyto_utils.transform import Whiten, RobustMAD
 
 
 def normalize(
@@ -59,13 +59,15 @@ def normalize(
     # Define which scaler to use
     method = method.lower()
 
-    avail_methods = ["standardize", "robustize", "whiten"]
+    avail_methods = ["standardize", "robustize", "mad_robustize", "whiten"]
     assert method in avail_methods, "operation must be one {}".format(avail_methods)
 
     if method == "standardize":
         scaler = StandardScaler()
     elif method == "robustize":
         scaler = RobustScaler()
+    elif method == "mad_robustize":
+        scaler = RobustMAD()
     elif method == "whiten":
         scaler = Whiten(center=whiten_center)
 

diff --git a/pycytominer/tests/test_cyto_utils/test_transform.py b/pycytominer/tests/test_cyto_utils/test_transform.py
@@ -2,7 +2,8 @@
 import random
 import numpy as np
 import pandas as pd
-from pycytominer.cyto_utils.transform import Whiten
+from scipy.stats import median_absolute_deviation
+from pycytominer.cyto_utils.transform import Whiten, RobustMAD
 
 random.seed(123)
 
@@ -45,3 +46,24 @@ def test_whiten_no_center():
     expected_result = data_df.shape[1]
 
     assert int(result) == expected_result
+
+
+def test_robust_mad():
+    """
+    Testing the RobustMAD class
+    """
+    scaler = RobustMAD()
+    scaler = scaler.fit(data_df)
+    transform_df = scaler.transform(data_df)
+
+    # The transfomed data is expected to have a median equal to zero
+    result = transform_df.median().sum()
+    expected_result = 0
+
+    assert int(result) == expected_result
+
+    # Check a median absolute deviation equal to the number of columns
+    result = median_absolute_deviation(transform_df).sum()
+    expected_result = data_df.shape[1]
+
+    assert int(result) == expected_result
diff --git a/pycytominer/tests/test_normalize.py b/pycytominer/tests/test_normalize.py
@@ -220,6 +220,82 @@ def test_normalize_robustize_ctrlsamples():
     pd.testing.assert_frame_equal(normalize_result, expected_result)
 
 
+def test_normalize_robustize_mad_allsamples():
+    """
+    Testing normalize pycytominer function
+    method = "standardize"
+    meta_features = "none"
+    samples="all"
+    """
+    normalize_result = normalize(
+        profiles=data_df.copy(),
+        features=["x", "y", "z", "zz"],
+        meta_features="infer",
+        samples="all",
+        method="mad_robustize",
+    ).round(1)
+
+    expected_result = pd.DataFrame(
+        {
+            "Metadata_plate": ["a", "a", "a", "a", "b", "b", "b", "b"],
+            "Metadata_treatment": [
+                "drug",
+                "drug",
+                "control",
+                "control",
+                "drug",
+                "drug",
+                "control",
+                "control",
+            ],
+            "x": [-1.1, -0.7, 2, -0.7, 0.7, 0.7, 0.7, -1.1],
+            "y": [-0.5, -1.2, 0.8, -0.2, 0.2, 1.5, 0.5, -1.2],
+            "z": [-0.8, 1.5, -0.5, 0.5, 0.8, 6.2, -0.5, -0.5],
+            "zz": [0.3, 2.9, -0.7, -0.3, 1.6, 7.1, -0.6, -0.6],
+        }
+    ).reset_index(drop=True)
+
+    pd.testing.assert_frame_equal(normalize_result, expected_result)
+
+
+def test_normalize_robustize_mad_ctrlsamples():
+    """
+    Testing normalize pycytominer function
+    method = "standardize"
+    meta_features = "none"
+    samples="Metadata_treatment == 'control'"
+    """
+    normalize_result = normalize(
+        profiles=data_df.copy(),
+        features=["x", "y", "z", "zz"],
+        meta_features="infer",
+        samples="Metadata_treatment == 'control'",
+        method="mad_robustize",
+    ).round(1)
+
+    expected_result = pd.DataFrame(
+        {
+            "Metadata_plate": ["a", "a", "a", "a", "b", "b", "b", "b"],
+            "Metadata_treatment": [
+                "drug",
+                "drug",
+                "control",
+                "control",
+                "drug",
+                "drug",
+                "control",
+                "control",
+            ],
+            "x": [-0.8, -0.5, 1.5, -0.5, 0.5, 0.5, 0.5, -0.8],
+            "y": [-0.9, -1.8, 0.9, -0.4, 0.0, 1.8, 0.4, -1.8],
+            "z": [-np.inf, np.inf, np.nan, np.inf, np.inf, np.inf, np.nan, np.nan],
+            "zz": [16.2, 59.4, -1.3, 5.4, 37.8, 132.2, 0.0, 0.0],
+        }
+    ).reset_index(drop=True)
+
+    pd.testing.assert_frame_equal(normalize_result, expected_result)
+
+
 def test_normalize_standardize_allsamples_fromfile():
     """
     Testing normalize pycytominer function