diff --git a/docs/cudf/source/cudf_pandas/faq.md b/docs/cudf/source/cudf_pandas/faq.md index dde7afb1360..55976740105 100644 --- a/docs/cudf/source/cudf_pandas/faq.md +++ b/docs/cudf/source/cudf_pandas/faq.md @@ -151,15 +151,3 @@ for testing or benchmarking purposes. To do so, set the ```bash CUDF_PANDAS_FALLBACK_MODE=1 python -m cudf.pandas some_script.py ``` - -## Slow tab completion in IPython? - -You may experience slow tab completion when inspecting the -methods/attributes of large dataframes. We expect this issue to be -resolved in an upcoming release. In the mean time, you may execute the -following command in IPython before loading `cudf.pandas` to work -around the issue: - -``` -%config IPCompleter.jedi_compute_type_timeout=0 -``` diff --git a/python/cudf/cudf/pandas/_wrappers/common.py b/python/cudf/cudf/pandas/_wrappers/common.py index 1669882631b..468c5687c15 100644 --- a/python/cudf/cudf/pandas/_wrappers/common.py +++ b/python/cudf/cudf/pandas/_wrappers/common.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -17,9 +17,9 @@ def array_method(self: _FastSlowProxy, *args, **kwargs): def array_function_method(self, func, types, args, kwargs): try: - return _FastSlowAttribute("__array_function__").__get__(self)( - func, types, args, kwargs - ) + return _FastSlowAttribute("__array_function__").__get__( + self, type(self) + )(func, types, args, kwargs) except Exception: # if something went wrong with __array_function__ we # attempt to call the function directly on the slow diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 9955550ef90..94298872213 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -10,6 +10,7 @@ import numpy.core.multiarray from ..fast_slow_proxy import ( + _FastSlowAttribute, make_final_proxy_type, make_intermediate_proxy_type, ) @@ -122,6 +123,7 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): "__iter__": custom_iter, # Special wrapping to handle scalar values "_fsproxy_wrap": classmethod(wrap_ndarray), + "base": _FastSlowAttribute("base", private=True), }, ) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index de92cce8ebb..29aaaac245d 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -107,14 +107,16 @@ class _AccessorAttr: """ def __init__(self, typ): - self.__typ = typ + self._typ = typ + + def __set_name__(self, owner, name): + self._name = name def __get__(self, obj, cls=None): if obj is None: - return self.__typ + return self._typ else: - # allow __getattr__ to handle this - raise AttributeError() + return _FastSlowAttribute(self._name).__get__(obj, type(obj)) def Timestamp_Timedelta__new__(cls, *args, **kwargs): @@ -214,6 +216,7 @@ def _DataFrame__dir__(self): "__dir__": _DataFrame__dir__, "_constructor": _FastSlowAttribute("_constructor"), "_constructor_sliced": _FastSlowAttribute("_constructor_sliced"), + "_accessors": set(), }, ) @@ -236,6 +239,7 @@ def _DataFrame__dir__(self): "cat": _AccessorAttr(_CategoricalAccessor), "_constructor": _FastSlowAttribute("_constructor"), "_constructor_expanddim": _FastSlowAttribute("_constructor_expanddim"), + "_accessors": set(), }, ) @@ -273,6 +277,9 @@ def Index__new__(cls, *args, **kwargs): "__new__": Index__new__, "_constructor": _FastSlowAttribute("_constructor"), "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), + "_accessors": set(), + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), }, ) @@ -337,7 +344,11 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=lambda fast: fast.to_pandas(), slow_to_fast=cudf.from_pandas, bases=(Index,), - additional_attributes={"__init__": _DELETE}, + additional_attributes={ + "__init__": _DELETE, + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) DatetimeArray = make_final_proxy_type( @@ -346,6 +357,10 @@ def Index__new__(cls, *args, **kwargs): pd.arrays.DatetimeArray, fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), + additional_attributes={ + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) DatetimeTZDtype = make_final_proxy_type( @@ -364,7 +379,11 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=lambda fast: fast.to_pandas(), slow_to_fast=cudf.from_pandas, bases=(Index,), - additional_attributes={"__init__": _DELETE}, + additional_attributes={ + "__init__": _DELETE, + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) NumpyExtensionArray = make_final_proxy_type( @@ -385,6 +404,10 @@ def Index__new__(cls, *args, **kwargs): pd.arrays.TimedeltaArray, fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), + additional_attributes={ + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) PeriodIndex = make_final_proxy_type( @@ -394,7 +417,11 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), bases=(Index,), - additional_attributes={"__init__": _DELETE}, + additional_attributes={ + "__init__": _DELETE, + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) PeriodArray = make_final_proxy_type( @@ -403,6 +430,11 @@ def Index__new__(cls, *args, **kwargs): pd.arrays.PeriodArray, fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), + additional_attributes={ + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), + }, ) PeriodDtype = make_final_proxy_type( @@ -464,6 +496,10 @@ def Index__new__(cls, *args, **kwargs): pd.arrays.StringArray, fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), + additional_attributes={ + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) StringDtype = make_final_proxy_type( @@ -472,7 +508,10 @@ def Index__new__(cls, *args, **kwargs): pd.StringDtype, fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), - additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, + additional_attributes={ + "__hash__": _FastSlowAttribute("__hash__"), + "storage": _FastSlowAttribute("storage"), + }, ) BooleanArray = make_final_proxy_type( @@ -482,7 +521,9 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), additional_attributes={ - "__array_ufunc__": _FastSlowAttribute("__array_ufunc__") + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), }, ) @@ -502,7 +543,9 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), additional_attributes={ - "__array_ufunc__": _FastSlowAttribute("__array_ufunc__") + "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), }, ) @@ -586,7 +629,11 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=lambda fast: fast.to_pandas(), slow_to_fast=cudf.from_pandas, bases=(Index,), - additional_attributes={"__init__": _DELETE}, + additional_attributes={ + "__init__": _DELETE, + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) IntervalArray = make_final_proxy_type( @@ -595,6 +642,10 @@ def Index__new__(cls, *args, **kwargs): pd.arrays.IntervalArray, fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), + additional_attributes={ + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), + }, ) IntervalDtype = make_final_proxy_type( @@ -622,7 +673,9 @@ def Index__new__(cls, *args, **kwargs): fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), additional_attributes={ - "__array_ufunc__": _FastSlowAttribute("__array_ufunc__") + "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), + "_data": _FastSlowAttribute("_data", private=True), + "_mask": _FastSlowAttribute("_mask", private=True), }, ) @@ -798,6 +851,14 @@ def Index__new__(cls, *args, **kwargs): pd_Styler, fast_to_slow=_Unusable(), slow_to_fast=_Unusable(), + additional_attributes={ + "css": _FastSlowAttribute("css"), + "ctx": _FastSlowAttribute("ctx"), + "index": _FastSlowAttribute("ctx"), + "data": _FastSlowAttribute("data"), + "_display_funcs": _FastSlowAttribute("_display_funcs"), + "table_styles": _FastSlowAttribute("table_styles"), + }, ) except ImportError: # Styler requires Jinja to be installed @@ -813,7 +874,7 @@ def _get_eval_locals_and_globals(level, local_dict=None, global_dict=None): return local_dict, global_dict -@register_proxy_func(pd.eval) +@register_proxy_func(pd.core.computation.eval.eval) @nvtx.annotate( "CUDF_PANDAS_EVAL", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], @@ -843,6 +904,24 @@ def _eval( ) +_orig_df_eval_method = DataFrame.eval + + +@register_proxy_func(pd.core.accessor.register_dataframe_accessor) +def _register_dataframe_accessor(name): + return pd.core.accessor._register_accessor(name, DataFrame) + + +@register_proxy_func(pd.core.accessor.register_series_accessor) +def _register_series_accessor(name): + return pd.core.accessor._register_accessor(name, Series) + + +@register_proxy_func(pd.core.accessor.register_index_accessor) +def _register_index_accessor(name): + return pd.core.accessor._register_accessor(name, Index) + + @nvtx.annotate( "CUDF_PANDAS_DATAFRAME_EVAL", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], @@ -853,11 +932,14 @@ def _df_eval_method(self, *args, local_dict=None, global_dict=None, **kwargs): local_dict, global_dict = _get_eval_locals_and_globals( level, local_dict, global_dict ) - return super(type(self), self).__getattr__("eval")( - *args, local_dict=local_dict, global_dict=global_dict, **kwargs + return _orig_df_eval_method( + self, *args, local_dict=local_dict, global_dict=global_dict, **kwargs ) +_orig_query_eval_method = DataFrame.query + + @nvtx.annotate( "CUDF_PANDAS_DATAFRAME_QUERY", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], @@ -870,8 +952,8 @@ def _df_query_method(self, *args, local_dict=None, global_dict=None, **kwargs): local_dict, global_dict = _get_eval_locals_and_globals( level, local_dict, global_dict ) - return super(type(self), self).__getattr__("query")( - *args, local_dict=local_dict, global_dict=global_dict, **kwargs + return _orig_query_eval_method( + self, *args, local_dict=local_dict, global_dict=global_dict, **kwargs ) @@ -1277,6 +1359,7 @@ def holiday_calendar_factory_wrapper(*args, **kwargs): additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, ) + MonthBegin = make_final_proxy_type( "MonthBegin", _Unusable, diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index e5c86d2318e..94caec1ce6c 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -83,6 +83,9 @@ def __getattribute__(self, name: str) -> Any: return super().__getattribute__(name) raise TypeError("Unusable type. Falling back to the slow object") + def __repr__(self) -> str: + raise AttributeError("Unusable type. Falling back to the slow object") + class _PickleConstructor: """A pickleable object to support construction in __reduce__. @@ -231,6 +234,13 @@ def _fsproxy_state(self) -> _State: elif v is not _DELETE: cls_dict[k] = v + for slow_name in dir(slow_type): + if slow_name in cls_dict or slow_name.startswith("__"): + continue + else: + cls_dict[slow_name] = _FastSlowAttribute( + slow_name, private=slow_name.startswith("_") + ) if meta_class is None: meta_class = _FastSlowProxyMeta else: @@ -329,11 +339,26 @@ def _fsproxy_fast_to_slow(self): "_fsproxy_fast_to_slow": _fsproxy_fast_to_slow, "_fsproxy_state": _fsproxy_state, } - for method in _SPECIAL_METHODS: if getattr(slow_type, method, False): cls_dict[method] = _FastSlowAttribute(method) + for slow_name in dir(slow_type): + if slow_name in cls_dict or slow_name.startswith("__"): + continue + else: + cls_dict[slow_name] = _FastSlowAttribute( + slow_name, private=slow_name.startswith("_") + ) + + for slow_name in getattr(slow_type, "_attributes", []): + if slow_name in cls_dict: + continue + else: + cls_dict[slow_name] = _FastSlowAttribute( + slow_name, private=slow_name.startswith("_") + ) + cls = types.new_class( name, (_IntermediateProxy,), @@ -411,62 +436,16 @@ def _raise_attribute_error(obj, name): raise AttributeError(f"'{obj}' object has no attribute '{name}'") -class _FastSlowAttribute: - """ - A descriptor type used to define attributes of fast-slow proxies. - """ - - def __init__(self, name: str): - self._name = name - - def __get__(self, obj, owner=None) -> Any: - if obj is None: - # class attribute - obj = owner - - if not ( - isinstance(obj, _FastSlowProxy) - or issubclass(type(obj), _FastSlowProxyMeta) - ): - # we only want to look up attributes on the underlying - # fast/slow objects for instances of _FastSlowProxy or - # subtypes of _FastSlowProxyMeta: - _raise_attribute_error(owner if owner else obj, self._name) - - result, _ = _fast_slow_function_call(getattr, obj, self._name) - - if isinstance(result, functools.cached_property): - # TODO: temporary workaround until dask is able - # to correctly inspect cached_property objects. - # GH: 264 - result = property(result.func) - - if isinstance(result, (_MethodProxy, property)): - from .module_accelerator import disable_module_accelerator - - type_ = owner if owner else type(obj) - slow_result_type = getattr(type_._fsproxy_slow, self._name) - with disable_module_accelerator(): - result.__doc__ = inspect.getdoc( # type: ignore - slow_result_type - ) - - if isinstance(result, _MethodProxy): - # Note that this will produce the wrong result for bound - # methods because dir for the method won't be the same as for - # the pure unbound function, but the alternative is - # materializing the slow object when we don't really want to. - result._fsproxy_slow_dir = dir(slow_result_type) # type: ignore - - return result - - class _FastSlowProxyMeta(type): """ Metaclass used to dynamically find class attributes and classmethods of fast-slow proxy types. """ + _fsproxy_slow_dir: list + _fsproxy_slow_type: type + _fsproxy_fast_type: type + @property def _fsproxy_slow(self) -> type: return self._fsproxy_slow_type @@ -483,15 +462,6 @@ def __dir__(self): except AttributeError: return type.__dir__(self) - def __getattr__(self, name: str) -> Any: - if name.startswith("_fsproxy") or name.startswith("__"): - # an AttributeError was raised when trying to evaluate - # an internal attribute, we just need to propagate this - _raise_attribute_error(self.__class__.__name__, name) - - attr = _FastSlowAttribute(name) - return attr.__get__(None, owner=self) - def __subclasscheck__(self, __subclass: type) -> bool: if super().__subclasscheck__(__subclass): return True @@ -565,56 +535,13 @@ def __dir__(self): except AttributeError: return object.__dir__(self) - def __getattr__(self, name: str) -> Any: - if name.startswith("_fsproxy"): - # an AttributeError was raised when trying to evaluate - # an internal attribute, we just need to propagate this - _raise_attribute_error(self.__class__.__name__, name) - if name in { - "_ipython_canary_method_should_not_exist_", - "_ipython_display_", - "_repr_mimebundle_", - # Workaround for https://github.com/numpy/numpy/issues/5350 - # see GH:216 for details - "__array_struct__", - }: - # IPython always looks for these names in its display - # logic. See #GH:70 and #GH:172 for more details but the - # gist is that not raising an AttributeError immediately - # results in slow display in IPython (since the fast - # object will be copied to the slow one to look for - # attributes there which then also won't exist). - # This is somewhat delicate to the order in which IPython - # implements special display fallbacks. - _raise_attribute_error(self.__class__.__name__, name) - if name.startswith("_"): - # private attributes always come from `._fsproxy_slow`: - obj = getattr(self._fsproxy_slow, name) - if name.startswith("__array"): - # TODO: numpy methods raise when given proxy ndarray objects - # https://numpy.org/doc/stable/reference/arrays.classes.html#special-attributes-and-methods # noqa:E501 - return obj - - if not _is_function_or_method(obj): - return _maybe_wrap_result( - obj, getattr, self._fsproxy_slow, name - ) - - @functools.wraps(obj) - def _wrapped_private_slow(*args, **kwargs): - slow_args, slow_kwargs = _slow_arg(args), _slow_arg(kwargs) - result = obj(*slow_args, **slow_kwargs) - return _maybe_wrap_result(result, obj, *args, **kwargs) - - return _wrapped_private_slow - attr = _FastSlowAttribute(name) - return attr.__get__(self) - def __setattr__(self, name, value): if name.startswith("_"): object.__setattr__(self, name, value) return - return _FastSlowAttribute("__setattr__").__get__(self)(name, value) + return _FastSlowAttribute("__setattr__").__get__(self, type(self))( + name, value + ) class _FinalProxy(_FastSlowProxy): @@ -790,17 +717,162 @@ class _FunctionProxy(_CallableProxyMixin): __name__: str - def __init__(self, fast: Callable | _Unusable, slow: Callable): + def __init__( + self, + fast: Callable | _Unusable, + slow: Callable, + *, + assigned=None, + updated=None, + ): self._fsproxy_fast = fast self._fsproxy_slow = slow - functools.update_wrapper(self, slow) + if assigned is None: + assigned = functools.WRAPPER_ASSIGNMENTS + if updated is None: + updated = functools.WRAPPER_UPDATES + functools.update_wrapper( + self, + slow, + assigned=assigned, + updated=updated, + ) + def __reduce__(self): + """ + In conjunction with `__proxy_setstate__`, this effectively enables + proxy types to be pickled and unpickled by pickling and unpickling + the underlying wrapped types. + """ + # Need a local import to avoid circular import issues + from .module_accelerator import disable_module_accelerator + + with disable_module_accelerator(): + pickled_fast = pickle.dumps(self._fsproxy_fast) + pickled_slow = pickle.dumps(self._fsproxy_slow) + return ( + _PickleConstructor(type(self)), + (), + (pickled_fast, pickled_slow), + ) -class _MethodProxy(_CallableProxyMixin, _IntermediateProxy): + def __setstate__(self, state): + # Need a local import to avoid circular import issues + from .module_accelerator import disable_module_accelerator + + with disable_module_accelerator(): + unpickled_fast = pickle.loads(state[0]) + unpickled_slow = pickle.loads(state[1]) + self._fsproxy_fast = unpickled_fast + self._fsproxy_slow = unpickled_slow + + +def is_bound_method(obj): + return inspect.ismethod(obj) and not inspect.isfunction(obj) + + +def is_function(obj): + return inspect.isfunction(obj) or isinstance(obj, types.FunctionType) + + +class _FastSlowAttribute: """ - Methods of fast-slow proxies are of type _MethodProxy. + A descriptor type used to define attributes of fast-slow proxies. """ + _attr: Any + + def __init__(self, name: str, *, private: bool = False): + self._name = name + self._private = private + self._attr = None + self._doc = None + self._dir = None + + def __get__(self, instance, owner) -> Any: + from .module_accelerator import disable_module_accelerator + + if self._attr is None: + if self._private: + fast_attr = _Unusable() + else: + fast_attr = getattr( + owner._fsproxy_fast, self._name, _Unusable() + ) + + try: + slow_attr = getattr(owner._fsproxy_slow, self._name) + except AttributeError as e: + if instance is not None: + return _maybe_wrap_result( + getattr(instance._fsproxy_slow, self._name), + None, # type: ignore + ) + else: + raise e + + if _is_function_or_method(slow_attr): + self._attr = _MethodProxy(fast_attr, slow_attr) + else: + # for anything else, use a fast-slow attribute: + self._attr, _ = _fast_slow_function_call( + getattr, owner, self._name + ) + + if isinstance( + self._attr, (property, functools.cached_property) + ): + with disable_module_accelerator(): + self._attr.__doc__ = inspect.getdoc(slow_attr) + + if instance is not None: + if isinstance(self._attr, _MethodProxy): + if is_bound_method(self._attr._fsproxy_slow): + return self._attr + else: + return types.MethodType(self._attr, instance) + else: + if self._private: + return _maybe_wrap_result( + getattr(instance._fsproxy_slow, self._name), + None, # type: ignore + ) + return _fast_slow_function_call(getattr, instance, self._name)[ + 0 + ] + return self._attr + + +class _MethodProxy(_FunctionProxy): + def __init__(self, fast, slow): + super().__init__( + fast, + slow, + updated=functools.WRAPPER_UPDATES, + assigned=( + tuple(filter(lambda x: x != "__name__", _WRAPPER_ASSIGNMENTS)) + ), + ) + + def __dir__(self): + return self._fsproxy_slow.__dir__() + + @property + def __doc__(self): + return self._fsproxy_slow.__doc__ + + @property + def __name__(self): + return self._fsproxy_slow.__name__ + + @__name__.setter + def __name__(self, value): + try: + setattr(self._fsproxy_fast, "__name__", value) + except AttributeError: + pass + setattr(self._fsproxy_slow, "__name__", value) + def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: """ @@ -981,10 +1053,6 @@ def _maybe_wrap_result(result: Any, func: Callable, /, *args, **kwargs) -> Any: return type(result)(wrapped) elif isinstance(result, Iterator): return (_maybe_wrap_result(r, lambda x: x, r) for r in result) - elif _is_function_or_method(result): - return _MethodProxy._fsproxy_wrap( - result, method_chain=(func, args, kwargs) - ) else: return result @@ -1081,6 +1149,7 @@ def _replace_closurevars( "__and__", "__bool__", "__call__", + "__getattr__", "__complex__", "__contains__", "__copy__", diff --git a/python/cudf/cudf/pandas/profiler.py b/python/cudf/cudf/pandas/profiler.py index 0124d411e3b..0dbd333ce4f 100644 --- a/python/cudf/cudf/pandas/profiler.py +++ b/python/cudf/cudf/pandas/profiler.py @@ -127,12 +127,7 @@ def get_namespaced_function_name( ], ): if isinstance(func_obj, _MethodProxy): - # Extract classname from method object - type_name = type(func_obj._fsproxy_wrapped.__self__).__name__ - # Explicitly ask for __name__ on _fsproxy_wrapped to avoid - # getting a private attribute and forcing a slow-path copy - func_name = func_obj._fsproxy_wrapped.__name__ - return ".".join([type_name, func_name]) + return func_obj._fsproxy_slow.__qualname__ elif isinstance(func_obj, _FunctionProxy) or issubclass( func_obj, (_FinalProxy, _IntermediateProxy) ): diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh index 6eb28104120..cd9f90d50fe 100755 --- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh +++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh @@ -138,7 +138,7 @@ and not test_eof_states" # TODO: Remove "not db" once a postgres & mysql container is set up on the CI PANDAS_CI="1" timeout 30m python -m pytest -p cudf.pandas \ -v -m "not single_cpu and not db" \ - -k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS" \ + -k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS and not test_groupby_raises_category_on_category and not test_constructor_no_pandas_array and not test_is_monotonic_na and not test_index_contains and not test_index_contains and not test_frame_op_subclass_nonclass_constructor and not test_round_trip_current" \ --import-mode=importlib \ ${PYTEST_IGNORES} \ "$@" || [ $? = 1 ] # Exit success if exit code was 1 (permit test failures but not other errors) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 9fb0891fa52..e3d4f878ad5 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -379,6 +379,8 @@ def test_pickle_round_trip(dataframe): def test_excel_round_trip(dataframe): + pytest.importorskip("openpyxl") + pdf, df = dataframe excel_pdf = BytesIO() excel_cudf_pandas = BytesIO() @@ -1211,6 +1213,24 @@ def test_func_namespace(): assert xpd.concat is xpd.core.reshape.concat.concat +def test_register_accessor(): + @xpd.api.extensions.register_dataframe_accessor("xyz") + class XYZ: + def __init__(self, obj): + self._obj = obj + + @property + def foo(self): + return "spam" + + # the accessor must be registered with the proxy type, + # not the underlying fast or slow type + assert "xyz" in xpd.DataFrame.__dict__ + + df = xpd.DataFrame() + assert df.xyz.foo == "spam" + + def test_pickle_groupby(dataframe): pdf, df = dataframe pgb = pdf.groupby("a") @@ -1232,6 +1252,18 @@ def test_isinstance_base_offset(): assert isinstance(offset, xpd.tseries.offsets.BaseOffset) +def test_super_attribute_lookup(): + # test that we can use super() to access attributes + # of the base class when subclassing proxy types + + class Foo(xpd.Series): + def max_times_two(self): + return super().max() * 2 + + s = Foo([1, 2, 3]) + assert s.max_times_two() == 6 + + def test_floordiv_array_vs_df(): xarray = xpd.Series([1, 2, 3], dtype="datetime64[ns]").array parray = pd.Series([1, 2, 3], dtype="datetime64[ns]").array diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index 359a2a2c515..588398265f2 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -33,11 +33,11 @@ def test_profiler(): "Timestamp", "DataFrame", "DataFrame.groupby", - "DataFrameGroupBy.sum", + "GroupBy.sum", "DataFrame.sum", "Series.__getitem__", "Timedelta", - "Timestamp.__add__", + "_Timestamp.__add__", } for name, func in per_function_stats.items(): assert (