Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: getting to polars test suite 100% pass rate #16150

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
84e7c4e
Handle case of broadcasting empty list of columns
wence- Jul 1, 2024
27ac28a
Create regex program during StringFunction init
wence- Jul 1, 2024
a4dbd0d
Match ordering requirements of polars for left join
wence- Jul 1, 2024
2a628da
Allow specifying exceptions to catch in execute_with_cudf
wence- Jul 1, 2024
917b5a7
Fix bug in documented behaviour of with_columns
wence- Jul 1, 2024
672b356
Allow dataframe to have overlapping names
wence- Jul 1, 2024
9465011
Cast count aggs to correct dtype in translation
wence- Jul 1, 2024
124dc5c
Raise on unsupported nested types
wence- Jul 1, 2024
72c7883
Handle empty column name in conversion to polars
wence- Jul 1, 2024
dc0cb3f
Raise for unsupported cast to/from strings
wence- Jul 1, 2024
16d7b86
WIP: Fix bug in HConcat
wence- Jul 1, 2024
cbc493e
Translate BinOp.Add between strings to ConcatHorizontal
wence- Jul 1, 2024
ffb0008
Only produce row_index if the schema demands it in Scan
wence- Jul 2, 2024
a4ee080
Expose type traits in pylibcudf
wence- Jul 4, 2024
4fb4ea9
Use new pylibcudf type traits in polars interpreter
wence- Jul 4, 2024
460439b
Alphabetise
wence- Jul 4, 2024
afd5941
MaskState not in all
wence- Jul 4, 2024
23f93cb
So bad at the alphabet
wence- Jul 4, 2024
53ca18c
Link into docs and fix sphinx errors
wence- Jul 5, 2024
b879b25
Merge branch 'wence/fea/pylibcudf-type-traits' into wence/fea/polars-…
wence- Jul 5, 2024
964ff72
Avoid out of bounds access in read_csv
wence- Jul 5, 2024
79a2605
WIP: Handling csv reader options
wence- Jul 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,28 @@ This page provides API documentation for pylibcudf.
filling
gpumemoryview
groupby
io/index.rst
interop
join
lists
merge
quantiles
reduce
replace
reshape
rolling
round
scalar
search
stream_compaction
sorting
replace
stream_compaction
table
traits
types
unary

.. toctree::
:maxdepth: 2
:caption: Subpackages

io/index.rst
strings/index.rst
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
======
traits
======

.. automodule:: cudf._lib.pylibcudf.traits
:members:
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def read_csv(
col_name = df._data.names[index]
df._data[col_name] = df._data[col_name].astype(col_dtype)

if names is not None and isinstance(names[0], (int)):
if names is not None and len(names) and isinstance(names[0], (int)):
df.columns = [int(x) for x in df._data]

# Set index if the index_col parameter is passed
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ set(cython_sources
stream_compaction.pyx
sorting.pyx
table.pyx
traits.pyx
types.pyx
unary.pyx
utils.pyx
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ from . cimport (
sorting,
stream_compaction,
strings,
traits,
types,
unary,
)
Expand Down Expand Up @@ -54,12 +55,14 @@ __all__ = [
"quantiles",
"reduce",
"replace",
"reshape",
"rolling",
"round",
"search",
"stream_compaction",
"strings",
"sorting",
"traits",
"types",
"unary",
]
4 changes: 4 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
sorting,
stream_compaction,
strings,
traits,
types,
unary,
)
Expand All @@ -35,6 +36,7 @@
__all__ = [
"Column",
"DataType",
"MaskState",
"Scalar",
"Table",
"TypeId",
Expand All @@ -54,12 +56,14 @@
"quantiles",
"reduce",
"replace",
"reshape",
"rolling",
"round",
"search",
"stream_compaction",
"strings",
"sorting",
"traits",
"types",
"unary",
]
27 changes: 27 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.libcudf.types cimport data_type


cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
cdef bool is_relationally_comparable(data_type)
cdef bool is_equality_comparable(data_type)
cdef bool is_numeric(data_type)
cdef bool is_index_type(data_type)
cdef bool is_unsigned(data_type)
cdef bool is_integral(data_type)
cdef bool is_integral_not_bool(data_type)
cdef bool is_floating_point(data_type)
cdef bool is_boolean(data_type)
cdef bool is_timestamp(data_type)
cdef bool is_fixed_point(data_type)
cdef bool is_duration(data_type)
cdef bool is_chrono(data_type)
cdef bool is_dictionary(data_type)
cdef bool is_fixed_width(data_type)
cdef bool is_compound(data_type)
cdef bool is_nested(data_type)
cdef bool is_bit_castable(data_type, data_type)
25 changes: 25 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/traits.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool

from .types cimport DataType


cpdef bool is_relationally_comparable(DataType typ)
cpdef bool is_equality_comparable(DataType typ)
cpdef bool is_numeric(DataType typ)
cpdef bool is_index_type(DataType typ)
cpdef bool is_unsigned(DataType typ)
cpdef bool is_integral(DataType typ)
cpdef bool is_integral_not_bool(DataType typ)
cpdef bool is_floating_point(DataType typ)
cpdef bool is_boolean(DataType typ)
cpdef bool is_timestamp(DataType typ)
cpdef bool is_fixed_point(DataType typ)
cpdef bool is_duration(DataType typ)
cpdef bool is_chrono(DataType typ)
cpdef bool is_dictionary(DataType typ)
cpdef bool is_fixed_width(DataType typ)
cpdef bool is_compound(DataType typ)
cpdef bool is_nested(DataType typ)
cpdef bool is_bit_castable(DataType source, DataType target)
151 changes: 151 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/traits.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool

from cudf._lib.pylibcudf.libcudf.utilities cimport traits

from .types cimport DataType


cpdef bool is_relationally_comparable(DataType typ):
"""Checks if the given data type supports relational comparisons.

For details, see :cpp:func:`is_relationally_comparable`.
"""
return traits.is_relationally_comparable(typ.c_obj)


cpdef bool is_equality_comparable(DataType typ):
"""Checks if the given data type supports equality comparisons.

For details, see :cpp:func:`is_equality_comparable`.
"""
return traits.is_equality_comparable(typ.c_obj)


cpdef bool is_numeric(DataType typ):
"""Checks if the given data type is numeric.

For details, see :cpp:func:`is_numeric`.
"""
return traits.is_numeric(typ.c_obj)


cpdef bool is_index_type(DataType typ):
"""Checks if the given data type is an index type.

For details, see :cpp:func:`is_index_type`.
"""
return traits.is_index_type(typ.c_obj)


cpdef bool is_unsigned(DataType typ):
"""Checks if the given data type is an unsigned type.

For details, see :cpp:func:`is_unsigned`.
"""
return traits.is_unsigned(typ.c_obj)


cpdef bool is_integral(DataType typ):
"""Checks if the given data type is an integral type.

For details, see :cpp:func:`is_integral`.
"""
return traits.is_integral(typ.c_obj)


cpdef bool is_integral_not_bool(DataType typ):
"""Checks if the given data type is an integral type excluding booleans.

For details, see :cpp:func:`is_integral_not_bool`.
"""
return traits.is_integral_not_bool(typ.c_obj)


cpdef bool is_floating_point(DataType typ):
"""Checks if the given data type is a floating point type.

For details, see :cpp:func:`is_floating_point`.
"""
return traits.is_floating_point(typ.c_obj)


cpdef bool is_boolean(DataType typ):
"""Checks if the given data type is a boolean type.

For details, see :cpp:func:`is_boolean`.
"""
return traits.is_boolean(typ.c_obj)


cpdef bool is_timestamp(DataType typ):
"""Checks if the given data type is a timestamp type.

For details, see :cpp:func:`is_timestamp`.
"""
return traits.is_timestamp(typ.c_obj)


cpdef bool is_fixed_point(DataType typ):
"""Checks if the given data type is a fixed point type.

For details, see :cpp:func:`is_fixed_point`.
"""
return traits.is_fixed_point(typ.c_obj)


cpdef bool is_duration(DataType typ):
"""Checks if the given data type is a duration type.

For details, see :cpp:func:`is_duration`.
"""
return traits.is_duration(typ.c_obj)


cpdef bool is_chrono(DataType typ):
"""Checks if the given data type is a chrono type.

For details, see :cpp:func:`is_chrono`.
"""
return traits.is_chrono(typ.c_obj)


cpdef bool is_dictionary(DataType typ):
"""Checks if the given data type is a dictionary type.

For details, see :cpp:func:`is_dictionary`.
"""
return traits.is_dictionary(typ.c_obj)


cpdef bool is_fixed_width(DataType typ):
"""Checks if the given data type is a fixed width type.

For details, see :cpp:func:`is_fixed_width`.
"""
return traits.is_fixed_width(typ.c_obj)


cpdef bool is_compound(DataType typ):
"""Checks if the given data type is a compound type.

For details, see :cpp:func:`is_compound`.
"""
return traits.is_compound(typ.c_obj)


cpdef bool is_nested(DataType typ):
"""Checks if the given data type is a nested type.

For details, see :cpp:func:`is_nested`.
"""
return traits.is_nested(typ.c_obj)


cpdef bool is_bit_castable(DataType source, DataType target):
"""Checks if the source type is bit-castable to the target type.

For details, see :cpp:func:`is_bit_castable`.
"""
return traits.is_bit_castable(source.c_obj, target.c_obj)
39 changes: 0 additions & 39 deletions python/cudf/cudf/pylibcudf_tests/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,49 +102,10 @@ def cudf_raises(expected_exception: BaseException, *args, **kwargs):
return pytest.raises(expected_exception, *args, **kwargs)


# TODO: Consider moving these type utilities into pylibcudf.types itself.
def is_signed_integer(plc_dtype: plc.DataType):
return (
plc.TypeId.INT8.value <= plc_dtype.id().value <= plc.TypeId.INT64.value
)


def is_integer(plc_dtype: plc.DataType):
return plc_dtype.id() in (
plc.TypeId.INT8,
plc.TypeId.INT16,
plc.TypeId.INT32,
plc.TypeId.INT64,
plc.TypeId.UINT8,
plc.TypeId.UINT16,
plc.TypeId.UINT32,
plc.TypeId.UINT64,
)


def is_floating(plc_dtype: plc.DataType):
return plc_dtype.id() in (
plc.TypeId.FLOAT32,
plc.TypeId.FLOAT64,
)


def is_boolean(plc_dtype: plc.DataType):
return plc_dtype.id() == plc.TypeId.BOOL8


def is_string(plc_dtype: plc.DataType):
return plc_dtype.id() == plc.TypeId.STRING


def is_fixed_width(plc_dtype: plc.DataType):
return (
is_integer(plc_dtype)
or is_floating(plc_dtype)
or is_boolean(plc_dtype)
)


def nesting_level(typ) -> tuple[int, int]:
"""Return list and struct nesting of a pyarrow type."""
if isinstance(typ, pa.ListType):
Expand Down
Loading
Loading