OmicsML · RemyLau · Nov 20, 2022 · Nov 20, 2022 · Nov 20, 2022
diff --git a/dance/utils/matrix.py b/dance/utils/matrix.py
@@ -0,0 +1,68 @@
+import numpy as np
+import torch
+
+from dance.typing import Literal
+
+NormMode = Literal["normalize", "standardize", "minmax", "l2"]
+
+
+def normalize(mat, *, mode: NormMode = "normalize", axis: int = 0, eps: float = -1.0):
+    """Normalize a matrix.
+
+    Parameters
+    ----------
+    mat
+        Input matrix to be notmalized, can be torch tensor or numpy array.
+    mode
+        Normalization mode. **normalize** means divide the values by the sum. **standardize** means center then rescale
+        by standard deviation. "minmax" means rescale the values along the axis of choice between zero and one.
+    axis
+        Axis along which the normalization will take place.
+    eps
+        Denominator correction factor to prevent divide by zero error. If set to -1, then replace the zero entries with
+        ones.
+
+    """
+    if isinstance(mat, torch.Tensor):
+        is_torch = True
+    elif not isinstance(mat, np.ndarray):
+        raise TypeError(f"Invalid type for input matrix: {type(mat)}")
+    else:
+        is_torch = False
+    opts = {"axis": axis, "keepdims": True}
+
+    # Compute shift
+    if mode == "standardize":
+        shift = -mat.mean(**opts)
+    elif mode == "minmax":
+        min_vals = mat.min(**opts)[0] if is_torch else mat.min(**opts)
+        shift = -min_vals
+    else:
+        shift = 0
+
+    # Compute rescaling factor
+    if mode == "normalize":
+        denom = mat.sum(**opts)
+    elif mode == "standardize":
+        denom = mat.std(**opts, unbiased=False) if is_torch else mat.std(**opts)
+    elif mode == "minmax":
+        max_vals = mat.max(**opts)[0] if is_torch else mat.max(**opts)
+        denom = max_vals - min_vals
+    elif mode == "l2":
+        denom = (mat**2).sum(**opts)**0.5
+    else:
+        denom = None
+
+    # Correct denominator to prevent divide by zero error
+    if denom is None:
+        denom = 1
+    elif eps == -1:
+        denom[denom == 0] = 1
+    elif eps > 0:
+        denom += eps
+    else:
+        raise ValueError(f"Invalid {eps=!r}. Must be positive or -1, the later set zero entries to one.")
+
+    norm_mat = (mat + shift) / denom
+
+    return norm_mat
diff --git a/setup.cfg b/setup.cfg
@@ -63,6 +63,7 @@ packages = find:
 [options.extras_require]
 dev =
     pre-commit==2.20.0
-    pytest==7.2.0
+    pytest-subtests==0.9.0
     pytest-xdist==3.0.2
+    pytest==7.2.0
     tox==3.27.1
diff --git a/tests/utils/test_matrix.py b/tests/utils/test_matrix.py
@@ -0,0 +1,26 @@
+import numpy as np
+
+from dance.utils import matrix
+
+
+def test_normalize(subtests):
+    mat = np.array([[1, 1], [4, 4]])
+
+    with subtests.test("normalize"):
+        assert matrix.normalize(mat, mode="normalize", axis=0).tolist() == [[0.2, 0.2], [0.8, 0.8]]
+        assert matrix.normalize(mat, mode="normalize", axis=1).tolist() == [[0.5, 0.5], [0.5, 0.5]]
+
+    with subtests.test("standardize"):
+        assert matrix.normalize(mat, mode="standardize", axis=0).tolist() == [[-1, -1], [1, 1]]
+        assert matrix.normalize(mat, mode="standardize", axis=1).tolist() == [[0, 0], [0, 0]]
+
+    with subtests.test("minmax"):
+        assert matrix.normalize(mat, mode="minmax", axis=0).tolist() == [[0, 0], [1, 1]]
+        assert matrix.normalize(mat, mode="minmax", axis=1).tolist() == [[0, 0], [0, 0]]
+
+    with subtests.test("l2"):
+        mat_norm0 = (mat / np.sqrt((mat**2).sum(0))).tolist()
+        assert matrix.normalize(mat, mode="l2", axis=0).tolist() == mat_norm0
+
+        mat_norm1 = (mat / np.sqrt((mat**2).sum(1, keepdims=True))).tolist()
+        assert matrix.normalize(mat, mode="l2", axis=1).tolist() == mat_norm1