Skip to content

Commit

Permalink
Initial investigation into NumPy proxying in cudf.pandas (#16286)
Browse files Browse the repository at this point in the history
Apart of #15397. Closes #14537. Creates `ProxyNDarray` which inherits from `np.ndarray`.

Authors:
  - Matthew Murray (https://github.com/Matt711)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #16286
  • Loading branch information
Matt711 authored Aug 16, 2024
1 parent f955dd7 commit 1c63e1e
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 1 deletion.
3 changes: 3 additions & 0 deletions python/cudf/cudf/pandas/_wrappers/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
make_final_proxy_type,
make_intermediate_proxy_type,
)
from ..proxy_base import ProxyNDarrayBase
from .common import (
array_interface,
array_method,
Expand Down Expand Up @@ -111,12 +112,14 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor):
numpy.ndarray,
fast_to_slow=cupy.ndarray.get,
slow_to_fast=cupy.asarray,
bases=(ProxyNDarrayBase,),
additional_attributes={
"__array__": array_method,
# So that pa.array(wrapped-numpy-array) works
"__arrow_array__": arrow_array_method,
"__cuda_array_interface__": cuda_array_interface,
"__array_interface__": array_interface,
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__"),
# ndarrays are unhashable
"__hash__": None,
# iter(cupy-array) produces an iterable of zero-dim device
Expand Down
20 changes: 19 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..options import _env_get_bool
from ..testing import assert_eq
from .annotation import nvtx
from .proxy_base import ProxyNDarrayBase


def call_operator(fn, args, kwargs):
Expand Down Expand Up @@ -564,7 +565,11 @@ def _fsproxy_wrap(cls, value, func):
_FinalProxy subclasses can override this classmethod if they
need particular behaviour when wrapped up.
"""
proxy = object.__new__(cls)
base_class = _get_proxy_base_class(cls)
if base_class is object:
proxy = base_class.__new__(cls)
else:
proxy = base_class.__new__(cls, value)
proxy._fsproxy_wrapped = value
return proxy

Expand Down Expand Up @@ -1193,6 +1198,19 @@ def is_proxy_object(obj: Any) -> bool:
return False


def _get_proxy_base_class(cls):
"""Returns the proxy base class if one exists"""
for proxy_class in PROXY_BASE_CLASSES:
if proxy_class in cls.__mro__:
return proxy_class
return object


PROXY_BASE_CLASSES: set[type] = {
ProxyNDarrayBase,
}


NUMPY_TYPES: set[str] = set(np.sctypeDict.values())


Expand Down
23 changes: 23 additions & 0 deletions python/cudf/cudf/pandas/proxy_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import cupy as cp
import numpy as np


class ProxyNDarrayBase(np.ndarray):
def __new__(cls, arr):
if isinstance(arr, cp.ndarray):
obj = np.asarray(arr.get()).view(cls)
return obj
elif isinstance(arr, np.ndarray):
obj = np.asarray(arr).view(cls)
return obj
else:
raise TypeError(
"Unsupported array type. Must be numpy.ndarray or cupy.ndarray"
)

def __array_finalize__(self, obj):
self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", None)
8 changes: 8 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1632,3 +1632,11 @@ def test_change_index_name(index):

assert s.index.name == name
assert df.index.name == name


def test_numpy_ndarray_isinstancecheck(series):
s1, s2 = series
arr1 = s1.values
arr2 = s2.values
assert isinstance(arr1, np.ndarray)
assert isinstance(arr2, np.ndarray)

0 comments on commit 1c63e1e

Please sign in to comment.