diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 63680473c14..b3a8836b193 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -211,7 +211,7 @@ std::unique_ptr scatter( /** * @brief Indicates when to allocate a mask, based on an existing mask. */ -enum class mask_allocation_policy { +enum class mask_allocation_policy : int32_t { NEVER, ///< Do not allocate a null mask, regardless of input RETAIN, ///< Allocate a null mask if the input contains one ALWAYS ///< Allocate a null mask, regardless of input diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index ea6ee76c14a..fbe2c8751dd 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -2,7 +2,7 @@ import pickle -from libc.stdint cimport int32_t, uint8_t, uintptr_t +from libc.stdint cimport uint8_t, uintptr_t from libcpp cimport bool from libcpp.memory cimport make_shared, shared_ptr, unique_ptr from libcpp.utility cimport move @@ -24,7 +24,6 @@ from cudf._lib.utils cimport table_view_from_columns, table_view_from_table from cudf._lib.reduce import minmax from cudf.core.abc import Serializable -from libcpp.functional cimport reference_wrapper from libcpp.memory cimport make_unique cimport cudf._lib.cpp.contiguous_split as cpp_contiguous_split @@ -36,13 +35,11 @@ from cudf._lib.cpp.lists.gather cimport ( ) from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view from cudf._lib.cpp.scalar.scalar cimport scalar -from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type from cudf._lib.utils cimport ( columns_from_pylibcudf_table, columns_from_table_view, - columns_from_unique_ptr, data_from_table_view, table_view_from_columns, ) @@ -116,25 +113,15 @@ def _copy_range(Column input_column, size_type input_begin, size_type input_end, size_type target_begin): - - cdef column_view input_column_view = input_column.view() - cdef column_view target_column_view = target_column.view() - cdef size_type c_input_begin = input_begin - cdef size_type c_input_end = input_end - cdef size_type c_target_begin = target_begin - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_copying.copy_range( - input_column_view, - target_column_view, - c_input_begin, - c_input_end, - c_target_begin) + return Column.from_pylibcudf( + pylibcudf.copying.copy_range( + input_column.to_pylibcudf(mode="read"), + target_column.to_pylibcudf(mode="read"), + input_begin, + input_end, + target_begin ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() @@ -184,48 +171,6 @@ def gather( return columns_from_pylibcudf_table(tbl) -cdef scatter_scalar(list source_device_slrs, - column_view scatter_map, - table_view target_table): - cdef vector[reference_wrapper[constscalar]] c_source - cdef DeviceScalar d_slr - cdef unique_ptr[table] c_result - - c_source.reserve(len(source_device_slrs)) - for d_slr in source_device_slrs: - c_source.push_back( - reference_wrapper[constscalar](d_slr.get_raw_ptr()[0]) - ) - - with nogil: - c_result = move( - cpp_copying.scatter( - c_source, - scatter_map, - target_table, - ) - ) - - return columns_from_unique_ptr(move(c_result)) - - -cdef scatter_column(list source_columns, - column_view scatter_map, - table_view target_table): - cdef table_view c_source = table_view_from_columns(source_columns) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.scatter( - c_source, - scatter_map, - target_table, - ) - ) - return columns_from_unique_ptr(move(c_result)) - - @acquire_spill_lock() def scatter(list sources, Column scatter_map, list target_columns, bool bounds_check=True): @@ -243,9 +188,6 @@ def scatter(list sources, Column scatter_map, list target_columns, if len(sources) == 0: return [] - cdef column_view scatter_map_view = scatter_map.view() - cdef table_view target_table_view = table_view_from_columns(target_columns) - if bounds_check: n_rows = len(target_columns[0]) if not ( @@ -257,62 +199,47 @@ def scatter(list sources, Column scatter_map, list target_columns, ) if isinstance(sources[0], Column): - return scatter_column( - sources, scatter_map_view, target_table_view + tbl = pylibcudf.copying.scatter_table( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]), + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), ) else: - source_scalars = [as_device_scalar(slr) for slr in sources] - return scatter_scalar( - source_scalars, scatter_map_view, target_table_view + tbl = pylibcudf.copying.scatter_scalars( + [( as_device_scalar(slr)).c_value for slr in sources], + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), ) + return columns_from_pylibcudf_table(tbl) + @acquire_spill_lock() def column_empty_like(Column input_column): - - cdef column_view input_column_view = input_column.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_copying.empty_like(input_column_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.copying.empty_column_like( + input_column.to_pylibcudf(mode="read") + ) + ) @acquire_spill_lock() def column_allocate_like(Column input_column, size=None): - - cdef size_type c_size = 0 - cdef column_view input_column_view = input_column.view() - cdef unique_ptr[column] c_result - - if size is None: - with nogil: - c_result = move(cpp_copying.allocate_like( - input_column_view, - cpp_copying.mask_allocation_policy.RETAIN) - ) - else: - c_size = size - with nogil: - c_result = move(cpp_copying.allocate_like( - input_column_view, - c_size, - cpp_copying.mask_allocation_policy.RETAIN) - ) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.copying.allocate_like( + input_column.to_pylibcudf(mode="read"), + size, + ) + ) @acquire_spill_lock() def columns_empty_like(list input_columns): - cdef table_view input_table_view = table_view_from_columns(input_columns) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move(cpp_copying.empty_like(input_table_view)) - - return columns_from_unique_ptr(move(c_result)) + return columns_from_pylibcudf_table( + pylibcudf.copying.empty_table_like( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]) + ) + ) @acquire_spill_lock() @@ -513,70 +440,15 @@ def _copy_if_else_scalar_scalar(DeviceScalar lhs, @acquire_spill_lock() def copy_if_else(object lhs, object rhs, Column boolean_mask): - - if isinstance(lhs, Column): - if isinstance(rhs, Column): - return _copy_if_else_column_column(lhs, rhs, boolean_mask) - else: - return _copy_if_else_column_scalar( - lhs, as_device_scalar(rhs), boolean_mask) - else: - if isinstance(rhs, Column): - return _copy_if_else_scalar_column( - as_device_scalar(lhs), rhs, boolean_mask) - else: - if lhs is None and rhs is None: - return lhs - - return _copy_if_else_scalar_scalar( - as_device_scalar(lhs), as_device_scalar(rhs), boolean_mask) - - -def _boolean_mask_scatter_columns(list input_columns, list target_columns, - Column boolean_mask): - - cdef table_view input_table_view = table_view_from_columns(input_columns) - cdef table_view target_table_view = table_view_from_columns(target_columns) - cdef column_view boolean_mask_view = boolean_mask.view() - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.boolean_mask_scatter( - input_table_view, - target_table_view, - boolean_mask_view - ) - ) - - return columns_from_unique_ptr(move(c_result)) - - -def _boolean_mask_scatter_scalar(list input_scalars, list target_columns, - Column boolean_mask): - - cdef vector[reference_wrapper[constscalar]] input_scalar_vector - input_scalar_vector.reserve(len(input_scalars)) - cdef DeviceScalar scl - for scl in input_scalars: - input_scalar_vector.push_back(reference_wrapper[constscalar]( - scl.get_raw_ptr()[0])) - cdef table_view target_table_view = table_view_from_columns(target_columns) - cdef column_view boolean_mask_view = boolean_mask.view() - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.boolean_mask_scatter( - input_scalar_vector, - target_table_view, - boolean_mask_view - ) + return Column.from_pylibcudf( + pylibcudf.copying.copy_if_else( + lhs.to_pylibcudf(mode="read") if isinstance(lhs, Column) + else ( as_device_scalar(lhs)).c_value, + rhs.to_pylibcudf(mode="read") if isinstance(rhs, Column) + else ( as_device_scalar(rhs)).c_value, + boolean_mask.to_pylibcudf(mode="read"), ) - - return columns_from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() @@ -598,23 +470,23 @@ def boolean_mask_scatter(list input_, list target_columns, return [] if isinstance(input_[0], Column): - return _boolean_mask_scatter_columns( - input_, - target_columns, - boolean_mask + tbl = pylibcudf.copying.boolean_mask_table_scatter( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), ) else: - scalar_list = [as_device_scalar(i) for i in input_] - return _boolean_mask_scatter_scalar( - scalar_list, - target_columns, - boolean_mask + tbl = pylibcudf.copying.boolean_mask_scalars_scatter( + [( as_device_scalar(i)).c_value for i in input_], + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), ) + return columns_from_pylibcudf_table(tbl) + @acquire_spill_lock() def shift(Column input, int offset, object fill_value=None): - cdef DeviceScalar fill if isinstance(fill_value, DeviceScalar): @@ -622,21 +494,12 @@ def shift(Column input, int offset, object fill_value=None): else: fill = as_device_scalar(fill_value, input.dtype) - cdef column_view c_input = input.view() - cdef int32_t c_offset = offset - cdef const scalar* c_fill_value = fill.get_raw_ptr() - cdef unique_ptr[column] c_output - - with nogil: - c_output = move( - cpp_copying.shift( - c_input, - c_offset, - c_fill_value[0] - ) - ) - - return Column.from_unique_ptr(move(c_output)) + col = pylibcudf.copying.shift( + input.to_pylibcudf(mode="read"), + offset, + fill.c_value, + ) + return Column.from_pylibcudf(col) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index 5637b55ac1c..f3e5c0aec72 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -36,118 +36,118 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[table] scatter ( - table_view source_table, - column_view scatter_map, - table_view target_table, + const table_view& source_table, + const column_view& scatter_map, + const table_view& target_table, ) except + cdef unique_ptr[table] scatter ( - vector[reference_wrapper[constscalar]] source_scalars, - column_view indices, - table_view target, + const vector[reference_wrapper[constscalar]]& source_scalars, + const column_view& indices, + const table_view& target, ) except + - ctypedef enum mask_allocation_policy: - NEVER 'cudf::mask_allocation_policy::NEVER', - RETAIN 'cudf::mask_allocation_policy::RETAIN', - ALWAYS 'cudf::mask_allocation_policy::ALWAYS' + cpdef enum class mask_allocation_policy(int32_t): + NEVER + RETAIN + ALWAYS cdef unique_ptr[column] empty_like ( - column_view input_column + const column_view& input_column ) except + cdef unique_ptr[column] allocate_like ( - column_view input_column, + const column_view& input_column, mask_allocation_policy policy ) except + cdef unique_ptr[column] allocate_like ( - column_view input_column, + const column_view& input_column, size_type size, mask_allocation_policy policy ) except + cdef unique_ptr[table] empty_like ( - table_view input_table + const table_view& input_table ) except + cdef void copy_range_in_place ( - column_view input_column, - mutable_column_view target_column, + const column_view& input_column, + mutable_column_view& target_column, size_type input_begin, size_type input_end, size_type target_begin ) except + cdef unique_ptr[column] copy_range ( - column_view input_column, - column_view target_column, + const column_view& input_column, + const column_view& target_column, size_type input_begin, size_type input_end, size_type target_begin ) except + cdef vector[column_view] slice ( - column_view input_column, + const column_view& input_column, vector[size_type] indices ) except + cdef vector[table_view] slice ( - table_view input_table, + const table_view& input_table, vector[size_type] indices ) except + cdef vector[column_view] split ( - column_view input_column, + const column_view& input_column, vector[size_type] splits ) except + cdef vector[table_view] split ( - table_view input_table, + const table_view& input_table, vector[size_type] splits ) except + cdef unique_ptr[column] copy_if_else ( - column_view lhs, - column_view rhs, - column_view boolean_mask + const column_view& lhs, + const column_view& rhs, + const column_view& boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - scalar lhs, - column_view rhs, - column_view boolean_mask + const scalar& lhs, + const column_view& rhs, + const column_view& boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - column_view lhs, - scalar rhs, - column_view boolean_mask + const column_view& lhs, + const scalar& rhs, + const column_view boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - scalar lhs, - scalar rhs, - column_view boolean_mask + const scalar& lhs, + const scalar& rhs, + const column_view boolean_mask ) except + cdef unique_ptr[table] boolean_mask_scatter ( - table_view input, - table_view target, - column_view boolean_mask + const table_view& input, + const table_view& target, + const column_view& boolean_mask ) except + cdef unique_ptr[table] boolean_mask_scatter ( - vector[reference_wrapper[constscalar]] input, - table_view target, - column_view boolean_mask + const vector[reference_wrapper[constscalar]]& input, + const table_view& target, + const column_view& boolean_mask ) except + cdef unique_ptr[scalar] get_element ( - column_view input, + const column_view& input, size_type index ) except + - ctypedef enum sample_with_replacement: - FALSE 'cudf::sample_with_replacement::FALSE', - TRUE 'cudf::sample_with_replacement::TRUE', + cpdef enum class sample_with_replacement(bool): + FALSE + TRUE diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index d57be650710..db0e42f5804 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -2,9 +2,11 @@ from libcpp cimport bool as cbool -from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.copying cimport mask_allocation_policy, out_of_bounds_policy +from cudf._lib.cpp.types cimport size_type from .column cimport Column +from .scalar cimport Scalar from .table cimport Table @@ -13,3 +15,29 @@ cpdef Table gather( Column gather_map, out_of_bounds_policy bounds_policy ) + +cpdef Table scatter_table(Table source, Column scatter_map, Table target_table) + +cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table) + +cpdef object empty_column_like(Column input) + +cpdef object empty_table_like(Table input) + +cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) + +cpdef Column copy_range( + Column input_column, + Column target_column, + size_type input_begin, + size_type input_end, + size_type target_begin, +) + +cpdef Column shift(Column input, size_type offset, Scalar fill_values) + +cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) + +cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask) + +cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index a27b44b3107..634aed3e6e5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -1,23 +1,47 @@ # Copyright (c) 2023, NVIDIA CORPORATION. +from cython.operator import dereference + +from libcpp.functional cimport reference_wrapper from libcpp.memory cimport unique_ptr from libcpp.utility cimport move +from libcpp.vector cimport vector # TODO: We want to make cpp a more full-featured package so that we can access # directly from that. It will make namespacing much cleaner in pylibcudf. What # we really want here would be # cimport libcudf... libcudf.copying.algo(...) from cudf._lib.cpp cimport copying as cpp_copying -from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.copying cimport mask_allocation_policy, out_of_bounds_policy +from cudf._lib.cpp.scalar.scalar cimport scalar +from cudf._lib.cpp.table.table cimport table +from cudf._lib.cpp.types cimport size_type +from cudf._lib.cpp.copying import \ + mask_allocation_policy as MaskAllocationPolicy # no-cython-lint from cudf._lib.cpp.copying import \ out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint -from cudf._lib.cpp.table.table cimport table - from .column cimport Column from .table cimport Table +# This is a workaround for +# https://github.com/cython/cython/issues/4180 +# when creating reference_wrapper[constscalar] in the constructor +ctypedef const scalar constscalar + + +cdef vector[reference_wrapper[const scalar]] _as_vector(list source): + """Make a vector of reference_wrapper[const scalar] from a list of scalars.""" + cdef vector[reference_wrapper[const scalar]] c_scalars + c_scalars.reserve(len(source)) + cdef Scalar slr + for slr in source: + c_scalars.push_back( + reference_wrapper[constscalar](dereference((slr).c_obj))) + return c_scalars + # TODO: Is it OK to reference the corresponding libcudf algorithm in the # documentation? Otherwise there's a lot of room for duplication. @@ -55,3 +79,187 @@ cpdef Table gather( ) ) return Table.from_libcudf(move(c_result)) + + +cpdef Table scatter_table(Table source, Column scatter_map, Table target_table): + cdef unique_ptr[table] c_result + + with nogil: + c_result = move( + cpp_copying.scatter( + source.view(), + scatter_map.view(), + target_table.view(), + ) + ) + + return Table.from_libcudf(move(c_result)) + + +# TODO: Could generalize list to sequence +cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table): + cdef vector[reference_wrapper[const scalar]] source_scalars = \ + _as_vector(source) + + cdef unique_ptr[table] c_result + with nogil: + c_result = move( + cpp_copying.scatter( + source_scalars, + scatter_map.view(), + target_table.view(), + ) + ) + + return Table.from_libcudf(move(c_result)) + + +cpdef object empty_column_like(Column input): + cdef unique_ptr[column] c_column_result + with nogil: + c_column_result = move( + cpp_copying.empty_like( + ( input).view(), + ) + ) + return Column.from_libcudf(move(c_column_result)) + + +cpdef object empty_table_like(Table input): + cdef unique_ptr[table] c_table_result + with nogil: + c_table_result = move( + cpp_copying.empty_like( + (
input).view(), + ) + ) + return Table.from_libcudf(move(c_table_result)) + + +cpdef Column allocate_like( + Column input_column, mask_allocation_policy policy, size=None +): + cdef unique_ptr[column] c_result + cdef size_type c_size = size if size is not None else input_column.size() + + with nogil: + c_result = move( + cpp_copying.allocate_like( + input_column.view(), + c_size, + policy, + ) + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column copy_range( + Column input_column, + Column target_column, + size_type input_begin, + size_type input_end, + size_type target_begin, +): + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_copying.copy_range( + input_column.view(), + target_column.view(), + input_begin, + input_end, + target_begin) + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column shift(Column input, size_type offset, Scalar fill_values): + cdef unique_ptr[column] c_result + with nogil: + c_result = move( + cpp_copying.shift( + input.view(), + offset, + dereference(fill_values.c_obj) + ) + ) + return Column.from_libcudf(move(c_result)) + + +cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): + cdef unique_ptr[column] result + + if isinstance(lhs, Column) and isinstance(rhs, Column): + with nogil: + result = move( + cpp_copying.copy_if_else( + ( lhs).view(), + ( rhs).view(), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Column) and isinstance(rhs, Scalar): + with nogil: + result = move( + cpp_copying.copy_if_else( + ( lhs).view(), + dereference(( rhs).c_obj), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Scalar) and isinstance(rhs, Column): + with nogil: + result = move( + cpp_copying.copy_if_else( + dereference(( lhs).c_obj), + ( rhs).view(), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Scalar) and isinstance(rhs, Scalar): + with nogil: + result = move( + cpp_copying.copy_if_else( + dereference(( lhs).c_obj), + dereference(( rhs).c_obj), + boolean_mask.view() + ) + ) + else: + raise ValueError(f"Invalid arguments {lhs} and {rhs}") + + return Column.from_libcudf(move(result)) + + +cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask): + cdef unique_ptr[table] result + + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + (
input).view(), + target.view(), + boolean_mask.view() + ) + ) + + return Table.from_libcudf(move(result)) + + +# TODO: Could generalize list to sequence +cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask): + cdef vector[reference_wrapper[const scalar]] source_scalars = _as_vector(input) + + cdef unique_ptr[table] result + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + source_scalars, + target.view(), + boolean_mask.view(), + ) + ) + + return Table.from_libcudf(move(result)) diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd index 09d853d832f..0edc934ca22 100644 --- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd @@ -21,7 +21,7 @@ cdef class Scalar: # needed for deallocation cdef DeviceMemoryResource mr - cdef const scalar* get(self) except * + cdef const scalar* get(self) noexcept nogil cpdef DataType type(self) cpdef bool is_valid(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx index 04f588bd3e6..965f10999f2 100644 --- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx @@ -108,7 +108,7 @@ cdef class Scalar: return pa.pyarrow_wrap_scalar(c_result) - cdef const scalar* get(self) except *: + cdef const scalar* get(self) noexcept nogil: return self.c_obj.get() cpdef DataType type(self): diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index 929f8b447ab..d87104bf168 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -3,6 +3,7 @@ from enum import IntEnum import numpy as np +import pandas as pd from libcpp.memory cimport make_shared, shared_ptr @@ -270,9 +271,13 @@ cpdef dtype_to_pylibcudf_type(dtype): else: tid = pylibcudf.TypeId.DECIMAL32 return pylibcudf.DataType(tid, -dtype.scale) - return pylibcudf.DataType( - SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[np.dtype(dtype)] - ) + + # libcudf types don't support localization so convert to the base type + if isinstance(dtype, pd.DatetimeTZDtype): + dtype = np.dtype(f"