Skip to content

Commit

Permalink
perf: make the ak.Array.mask weakly dependent on the array itself (#3347
Browse files Browse the repository at this point in the history
)

* make the ak.Array.mask weakly dependent on the array itself

* add test

* skip test for pypy

* fix more cases where the array doesn't live long enough to create a mask with ak.Array.mask

* style: pre-commit fixes

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
pfackeldey and pre-commit-ci[bot] authored Dec 18, 2024
1 parent 27faa82 commit f781231
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 76 deletions.
2 changes: 1 addition & 1 deletion docs/user-guide/how-to-create-missing.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,6 @@ def faster_example():
data, mask = faster_example()
array = ak.Array(data).mask[mask]
array = ak.mask(data, mask)
array
```
13 changes: 10 additions & 3 deletions src/awkward/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import keyword
import pickle
import re
import weakref
from collections.abc import Iterable, Mapping, Sequence, Sized

from awkward_cpp.lib import _ext
Expand Down Expand Up @@ -488,13 +489,19 @@ def named_axis(self) -> AxisMapping:

class Mask:
def __init__(self, array):
self._array = array
self._array = weakref.ref(array)

def __getitem__(self, where):
array = self._array()
if array is None:
msg = "The array to mask was deleted before it could be masked. "
msg += "If you want to construct this mask, you must either keep the array alive "
msg += "or use 'ak.mask' explicitly."
raise ValueError(msg)
with ak._errors.OperationErrorContext(
"ak.Array.mask", args=[self._array, where], kwargs={}
"ak.Array.mask", args=[array, where], kwargs={}
):
return ak.operations.mask(self._array, where, valid_when=True)
return ak.operations.mask(array, where, valid_when=True)

@property
def mask(self):
Expand Down
3 changes: 2 additions & 1 deletion tests-cuda/test_2922a_new_cuda_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,8 @@ def test_2064_fill_none_record_axis_last():


def test_2064_fill_none_record_option_outside_record():
record = ak.zip({"x": [1, 4], "y": [2, 3]}).mask[[True, False]]
record = ak.zip({"x": [1, 4], "y": [2, 3]})
record = record.mask[[True, False]]

cuda_record = ak.to_backend(record, "cuda")

Expand Down
80 changes: 36 additions & 44 deletions tests-cuda/test_3136_cuda_reducers.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,11 @@ def test_2020_reduce_axis_none_sum():
ak.sum(array, axis=None, keepdims=True),
ak.to_regular(ak.Array([[63.0]], backend="cuda")),
)

arr = ak.Array([[63.0]], backend="cuda")
assert ak.almost_equal(
ak.sum(array, axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[63.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")]
),
ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]),
)
assert ak.sum(array[2], axis=None, mask_identity=True) is None
del array
Expand All @@ -291,13 +291,11 @@ def test_2020_reduce_axis_none_prod():
ak.prod(array[1:], axis=None, keepdims=True),
ak.to_regular(ak.Array([[4838400.0]], backend="cuda")),
)

arr = ak.Array([[4838400.0]], backend="cuda")
assert ak.almost_equal(
ak.prod(array[1:], axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[4838400.0]], backend="cuda").mask[
ak.Array([[True]], backend="cuda")
]
),
ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]),
)
assert ak.prod(array[2], axis=None, mask_identity=True) is None
del array
Expand All @@ -316,19 +314,17 @@ def test_2020_reduce_axis_none_min():
ak.min(array, axis=None, keepdims=True, initial=-100.0, mask_identity=False),
ak.to_regular(ak.Array([[-100.0]], backend="cuda")),
)

arr = ak.Array([[0.0]], backend="cuda")
assert ak.almost_equal(
ak.min(array, axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[0.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")]
),
ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]),
)

arr = ak.Array(ak.Array([[np.inf]], backend="cuda"))
assert ak.almost_equal(
ak.min(array[-1:], axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array(ak.Array([[np.inf]], backend="cuda")).mask[
ak.Array([[False]], backend="cuda")
]
),
ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]),
)
assert ak.min(array[2], axis=None, mask_identity=True) is None
del array
Expand All @@ -347,19 +343,17 @@ def test_2020_reduce_axis_none_max():
ak.max(array, axis=None, keepdims=True, initial=100.0, mask_identity=False),
ak.to_regular(ak.Array([[100.0]], backend="cuda")),
)

arr = ak.Array([[10.0]], backend="cuda")
assert ak.almost_equal(
ak.max(array, axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[10.0]], backend="cuda").mask[ak.Array([[True]], backend="cuda")]
),
ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]),
)

arr = ak.Array(ak.Array([[np.inf]], backend="cuda"))
assert ak.almost_equal(
ak.max(array[-1:], axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array(ak.Array([[np.inf]], backend="cuda")).mask[
ak.Array([[False]], backend="cuda")
]
),
ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]),
)
assert ak.max(array[2], axis=None, mask_identity=True) is None
del array
Expand All @@ -374,17 +368,17 @@ def test_2020_reduce_axis_none_count():
ak.count(array, axis=None, keepdims=True, mask_identity=False),
ak.to_regular(ak.Array([[12]], backend="cuda")),
)

arr = ak.Array([[12]], backend="cuda")
assert ak.almost_equal(
ak.count(array, axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[12]], backend="cuda").mask[ak.Array([[True]], backend="cuda")]
),
ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]),
)

arr = ak.Array([[0]], backend="cuda")
assert ak.almost_equal(
ak.count(array[-1:], axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")]
),
ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]),
)
assert ak.count(array[2], axis=None, mask_identity=True) is None
assert ak.count(array[2], axis=None, mask_identity=False) == 0
Expand All @@ -400,17 +394,17 @@ def test_2020_reduce_axis_none_count_nonzero():
ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=False),
ak.to_regular(ak.Array([[11]], backend="cuda")),
)

arr = ak.Array([[11]], backend="cuda")
assert ak.almost_equal(
ak.count_nonzero(array, axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[11]], backend="cuda").mask[ak.Array([[True]], backend="cuda")]
),
ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]),
)

arr = ak.Array([[0]], backend="cuda")
assert ak.almost_equal(
ak.count_nonzero(array[-1:], axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")]
),
ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")]),
)
assert ak.count_nonzero(array[2], axis=None, mask_identity=True) is None
assert ak.count_nonzero(array[2], axis=None, mask_identity=False) == 0
Expand All @@ -422,9 +416,9 @@ def test_2020_reduce_axis_none_std_no_mask_axis_none():
[[0, 2, 3.0], [4, 5, 6, 7, 8], [], [9, 8, None], [10, 1], []], backend="cuda"
)
out1 = ak.std(array[-1:], axis=None, keepdims=True, mask_identity=True)
out2 = ak.to_regular(
ak.Array([[0.0]], backend="cuda").mask[ak.Array([[False]], backend="cuda")]
)

arr = ak.Array([[0.0]], backend="cuda")
out2 = ak.to_regular(arr.mask[ak.Array([[False]], backend="cuda")])
assert ak.almost_equal(out1, out2)

out3 = ak.std(array[2], axis=None, mask_identity=True)
Expand All @@ -442,13 +436,11 @@ def test_2020_reduce_axis_none_std():
ak.std(array, axis=None, keepdims=True, mask_identity=False),
ak.to_regular([[3.139134700306227]]),
)

arr = ak.Array([[3.139134700306227]], backend="cuda")
cpt.assert_allclose(
ak.std(array, axis=None, keepdims=True, mask_identity=True),
ak.to_regular(
ak.Array([[3.139134700306227]], backend="cuda").mask[
ak.Array([[True]], backend="cuda")
]
),
ak.to_regular(arr.mask[ak.Array([[True]], backend="cuda")]),
)
assert np.isnan(ak.std(array[2], axis=None, mask_identity=False))
del array
Loading

0 comments on commit f781231

Please sign in to comment.