From 8ade6dfac3f7aa091d46e33a94618d9c0638591e Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Fri, 28 Jul 2023 15:06:34 +0100
Subject: [PATCH 1/2] Antialiased support for max and min_row_index reductions

---
 datashader/compiler.py   | 26 ++++++++++++++++----------
 datashader/core.py       |  2 +-
 datashader/reductions.py | 36 +++++++++++++++++++++++++++++-------
 datashader/utils.py      | 13 +++++++++++++
 4 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/datashader/compiler.py b/datashader/compiler.py
index 8b46f3657..9b4a0737d 100644
--- a/datashader/compiler.py
+++ b/datashader/compiler.py
@@ -10,7 +10,7 @@
 from .antialias import AntialiasCombination
 from .reductions import SpecialColumn, by, category_codes, summary
 from .utils import (isnull, ngjit, parallel_fill, nanmax_in_place, nanmin_in_place, nansum_in_place,
-    nanfirst_in_place, nanlast_in_place,
+    nanfirst_in_place, nanlast_in_place, row_max_in_place, row_min_in_place
 )
 
 try:
@@ -146,16 +146,22 @@ def _get_antialias_stage_2_combine_func(combination: AntialiasCombination, zero:
         # The aggs to combine here are either 3D (ny, nx, ncat) if categorical is True or
         # 2D (ny, nx) if categorical is False. The same combination functions can be for both
         # as all elements are independent.
-        if combination == AntialiasCombination.MAX:
-            return nanmax_in_place
-        elif combination == AntialiasCombination.MIN:
-            return nanmin_in_place
-        elif combination == AntialiasCombination.FIRST:
-            return nanfirst_in_place
-        elif combination == AntialiasCombination.LAST:
-            return nanlast_in_place
+        if zero == -1:
+            if combination == AntialiasCombination.MAX:
+                return row_max_in_place
+            elif combination == AntialiasCombination.MIN:
+                return row_min_in_place
         else:
-            return nansum_in_place
+            if combination == AntialiasCombination.MAX:
+                return nanmax_in_place
+            elif combination == AntialiasCombination.MIN:
+                return nanmin_in_place
+            elif combination == AntialiasCombination.FIRST:
+                return nanfirst_in_place
+            elif combination == AntialiasCombination.LAST:
+                return nanlast_in_place
+            else:
+                return nansum_in_place
 
     raise NotImplementedError
 
diff --git a/datashader/core.py b/datashader/core.py
index af6264a84..ea6059c1e 100644
--- a/datashader/core.py
+++ b/datashader/core.py
@@ -440,7 +440,7 @@ def line(self, source, x=None, y=None, agg=None, axis=0, geometry=None,
 
             if not isinstance(non_cat_agg, (
                 rd.any, rd.count, rd.max, rd.min, rd.sum, rd.summary, rd._sum_zero,
-                rd.first, rd.last, rd.mean
+                rd.mean, rd._first_or_last, rd._max_or_min_row_index,
             )):
                 raise NotImplementedError(
                     f"{type(non_cat_agg)} reduction not implemented for antialiased lines")
diff --git a/datashader/reductions.py b/datashader/reductions.py
index cbb9c0529..30047e577 100644
--- a/datashader/reductions.py
+++ b/datashader/reductions.py
@@ -2056,13 +2056,6 @@ def out_dshape(self, in_dshape, antialias, cuda, partitioned):
     def uses_row_index(self, cuda, partitioned):
         return True
 
-    def _build_append(self, dshape, schema, cuda, antialias, self_intersect):
-        # Doesn't yet support antialiasing
-        if cuda:
-            return self._append_cuda
-        else:
-            return self._append
-
 
 class _max_row_index(_max_or_min_row_index):
     """Max reduction operating on row index.
@@ -2071,6 +2064,9 @@ class _max_row_index(_max_or_min_row_index):
     user code. It is primarily purpose is to support the use of ``last``
     reductions using dask and/or CUDA.
     """
+    def _antialias_stage_2(self, self_intersect, array_module) -> tuple[AntialiasStage2]:
+        return (AntialiasStage2(AntialiasCombination.MAX, -1),)
+
     @staticmethod
     @ngjit
     def _append(x, y, agg, field):
@@ -2080,6 +2076,16 @@ def _append(x, y, agg, field):
             return 0
         return -1
 
+    @staticmethod
+    @ngjit
+    def _append_antialias(x, y, agg, field, aa_factor):
+        # field is int64 row index
+        # Ignore aa_factor
+        if field > agg[y, x]:
+            agg[y, x] = field
+            return 0
+        return -1
+
     # GPU append functions
     @staticmethod
     @nb_cuda.jit(device=True)
@@ -2108,6 +2114,12 @@ class _min_row_index(_max_or_min_row_index):
     user code. It is primarily purpose is to support the use of ``first``
     reductions using dask and/or CUDA.
     """
+    def _antialias_requires_2_stages(self):
+        return True
+
+    def _antialias_stage_2(self, self_intersect, array_module) -> tuple[AntialiasStage2]:
+        return (AntialiasStage2(AntialiasCombination.MIN, -1),)
+
     def uses_cuda_mutex(self):
         return True
 
@@ -2121,6 +2133,16 @@ def _append(x, y, agg, field):
             return 0
         return -1
 
+    @staticmethod
+    @ngjit
+    def _append_antialias(x, y, agg, field, aa_factor):
+        # field is int64 row index
+        # Ignore aa_factor
+        if field != -1 and (agg[y, x] == -1 or field < agg[y, x]):
+            agg[y, x] = field
+            return 0
+        return -1
+
     # GPU append functions
     @staticmethod
     @nb_cuda.jit(device=True)
diff --git a/datashader/utils.py b/datashader/utils.py
index 0cc38cb2a..3f7eaab4c 100644
--- a/datashader/utils.py
+++ b/datashader/utils.py
@@ -789,6 +789,19 @@ def row_min_in_place(ret, other):
             ret[i] = other[i]
 
 
+@ngjit
+def row_max_in_place(ret, other):
+    """Maximum of 2 arrays of row indexes.
+    Row indexes are integers from 0 upwards, missing data is -1.
+    Return the first array.
+    """
+    ret = ret.ravel()
+    other = other.ravel()
+    for i in range(len(ret)):
+        if other[i] > -1 and (ret[i] == -1 or other[i] > ret[i]):
+            ret[i] = other[i]
+
+
 @ngjit
 def _row_max_n_impl(ret_pixel, other_pixel):
     """Single pixel implementation of row_max_n_in_place.

From 483b8a97400abe3209c996547541bda5f60b8246 Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Fri, 28 Jul 2023 16:07:38 +0100
Subject: [PATCH 2/2] Add tests

---
 datashader/tests/test_pandas.py | 83 +++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py
index 38b5c4bed..17d58be94 100644
--- a/datashader/tests/test_pandas.py
+++ b/datashader/tests/test_pandas.py
@@ -2338,6 +2338,17 @@ def nansum(arr0, arr1):
     ret[mask] = np.nan
     return ret
 
+def rowmax(arr0, arr1):
+    return np.maximum(arr0, arr1)
+
+def rowmin(arr0, arr1):
+    bigint = np.max([np.max(arr0), np.max(arr1)]) + 1
+    arr0[arr0 < 0] = bigint
+    arr1[arr1 < 0] = bigint
+    ret = np.minimum(arr0, arr1)
+    ret[ret == bigint] = -1
+    return ret
+
 line_antialias_df = pd.DataFrame(dict(
     # Self-intersecting line.
     x0=np.asarray([0, 1, 1, 0]),
@@ -2386,6 +2397,58 @@ def nansum(arr0, arr1):
     [np.nan, np.nan,   np.nan,   np.nan,   np.nan,   np.nan,  np.nan,  np.nan,   np.nan,  np.nan,  np.nan],
     [np.nan, np.nan,   np.nan,   np.nan,   np.nan,   np.nan,  np.nan,  np.nan,   np.nan,  np.nan,  np.nan],
 ])
+line_antialias_sol_min_index_0 = np.array([
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1,  0,  0, -1, -1, -1, -1, -1,  2,  1, -1],
+    [-1,  0,  0,  0, -1, -1, -1,  2,  2,  1, -1],
+    [-1, -1,  0,  0,  0, -1,  2,  2,  2,  1, -1],
+    [-1, -1, -1,  0,  0,  0,  2,  2, -1,  1, -1],
+    [-1, -1, -1, -1,  0,  0,  0, -1, -1,  1, -1],
+    [-1, -1, -1,  2,  2,  0,  0,  0, -1,  1, -1],
+    [-1, -1,  2,  2,  2, -1,  0,  0,  0,  1, -1],
+    [-1,  2,  2,  2, -1, -1, -1,  0,  0,  0, -1],
+    [-1,  2,  2, -1, -1, -1, -1, -1,  0,  0, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+], dtype=np.int64)
+line_antialias_sol_max_index_0 = np.array([
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1,  0,  0, -1, -1, -1, -1, -1,  2,  2, -1],
+    [-1,  0,  0,  0, -1, -1, -1,  2,  2,  2, -1],
+    [-1, -1,  0,  0,  0, -1,  2,  2,  2,  1, -1],
+    [-1, -1, -1,  0,  0,  2,  2,  2, -1,  1, -1],
+    [-1, -1, -1, -1,  2,  2,  2, -1, -1,  1, -1],
+    [-1, -1, -1,  2,  2,  2,  0,  0, -1,  1, -1],
+    [-1, -1,  2,  2,  2, -1,  0,  0,  0,  1, -1],
+    [-1,  2,  2,  2, -1, -1, -1,  0,  0,  1, -1],
+    [-1,  2,  2, -1, -1, -1, -1, -1,  0,  1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+], dtype=np.int64)
+line_antialias_sol_min_index_1 = np.array([
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1,  0,  0,  0,  0,  1,  1,  1,  2,  2, -1],
+    [-1,  0,  0,  0,  0,  1,  1,  1,  2,  2, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+], dtype=np.int64)
+line_antialias_sol_max_index_1 = np.array([
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1,  0,  0,  1,  1,  1,  2,  2,  2,  2, -1],
+    [-1,  0,  0,  0,  1,  1,  2,  2,  2,  2, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+    [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+], dtype=np.int64)
 
 def test_line_antialias():
     x_range = y_range = (-0.1875, 1.1875)
@@ -2425,6 +2488,12 @@ def test_line_antialias():
     sol = np.where(line_antialias_sol_0 > 0, 3.0, np.nan)
     assert_eq_ndarray(agg.data, sol, close=True)
 
+    agg = cvs.line(agg=ds._min_row_index(), **kwargs)
+    assert_eq_ndarray(agg.data, line_antialias_sol_min_index_0)
+
+    agg = cvs.line(agg=ds._max_row_index(), **kwargs)
+    assert_eq_ndarray(agg.data, line_antialias_sol_max_index_0)
+
     # Second line only, doesn't self-intersect
     kwargs = dict(source=line_antialias_df, x="x1", y="y1", line_width=1)
     agg = cvs.line(agg=ds.any(), **kwargs)
@@ -2458,6 +2527,12 @@ def test_line_antialias():
     sol_mean = np.where(line_antialias_sol_1 > 0, 3.0, np.nan)
     assert_eq_ndarray(agg.data, sol_mean, close=True)
 
+    agg = cvs.line(agg=ds._min_row_index(), **kwargs)
+    assert_eq_ndarray(agg.data, line_antialias_sol_min_index_1)
+
+    agg = cvs.line(agg=ds._max_row_index(), **kwargs)
+    assert_eq_ndarray(agg.data, line_antialias_sol_max_index_1)
+
     # Both lines.
     kwargs = dict(source=line_antialias_df, x=["x0", "x1"], y=["y0", "y1"], line_width=1)
     agg = cvs.line(agg=ds.any(), **kwargs)
@@ -2497,6 +2572,14 @@ def test_line_antialias():
     sol_mean = np.where(sol_count>0, 3.0, np.nan)
     assert_eq_ndarray(agg.data, sol_mean, close=True)
 
+    agg = cvs.line(agg=ds._min_row_index(), **kwargs)
+    sol_min_row = rowmin(line_antialias_sol_min_index_0, line_antialias_sol_min_index_1)
+    assert_eq_ndarray(agg.data, sol_min_row)
+
+    agg = cvs.line(agg=ds._max_row_index(), **kwargs)
+    sol_max_row = rowmax(line_antialias_sol_max_index_0, line_antialias_sol_max_index_1)
+    assert_eq_ndarray(agg.data, sol_max_row)
+
     assert_eq_ndarray(agg.x_range, x_range, close=True)
     assert_eq_ndarray(agg.y_range, y_range, close=True)