Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add labeling APIs to pylibcudf #16761

Merged
merged 6 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions docs/cudf/source/developer_guide/pylibcudf.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,38 +186,39 @@ Here is an example of appropriate enum usage.


```cython
# cpp/copying.pxd
# pylibcudf/libcudf/copying.pxd
cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
# cpdef here so that we export both a cdef enum class and a Python enum.Enum.
cpdef enum class out_of_bounds_policy(bool):
NULLIFY
DONT_CHECK


# cpp/copying.pyx
# This file is empty, but is required to compile the Python enum in cpp/copying.pxd
# pylibcudf/libcudf/copying.pyx
# This file is empty, but is required to compile the Python enum in pylibcudf/libcudf/copying.pxd
# Ensure this file is included in pylibcudf/libcudf/CMakeLists.txt


# pylibcudf/copying.pxd

# cimport the enum using the exact name
# Once https://github.com/cython/cython/issues/5609 is resolved,
# this import should instead be
# from cudf._lib.cpp.copying cimport out_of_bounds_policy as OutOfBoundsPolicy
from cudf._lib.cpp.copying cimport out_of_bounds_policy
# from pylibcudf.libcudf.copying cimport out_of_bounds_policy as OutOfBoundsPolicy
from pylibcudf.libcudf.copying cimport out_of_bounds_policy


# pylibcudf/copying.pyx
# Access cpp.copying members that aren't part of this module's public API via
# this module alias
from cudf._lib.cpp cimport copying as cpp_copying
from cudf._lib.cpp.copying cimport out_of_bounds_policy
from pylibcudf.libcudf cimport copying as cpp_copying
from pylibcudf.libcudf.copying cimport out_of_bounds_policy

# This import exposes the enum in the public API of this module.
# It requires a no-cython-lint tag because it will be unused: all typing of
# parameters etc will need to use the Cython name `out_of_bounds_policy` until
# the Cython bug is resolved.
from cudf._lib.cpp.copying import \
from pylibcudf.libcudf.copying import \
out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint
```

Expand Down
40 changes: 10 additions & 30 deletions python/cudf/cudf/_lib/labeling.pyx
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from cudf.core.buffer import acquire_spill_lock

from libcpp cimport bool as cbool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.labeling cimport inclusive, label_bins as cpp_label_bins
import pylibcudf as plc

from cudf._lib.column cimport Column
from cudf.core.buffer import acquire_spill_lock


# Note that the parameter input shadows a Python built-in in the local scope,
Expand All @@ -19,26 +14,11 @@ from cudf._lib.column cimport Column
@acquire_spill_lock()
def label_bins(Column input, Column left_edges, cbool left_inclusive,
Column right_edges, cbool right_inclusive):
cdef inclusive c_left_inclusive = \
inclusive.YES if left_inclusive else inclusive.NO
cdef inclusive c_right_inclusive = \
inclusive.YES if right_inclusive else inclusive.NO

cdef column_view input_view = input.view()
cdef column_view left_edges_view = left_edges.view()
cdef column_view right_edges_view = right_edges.view()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_label_bins(
input_view,
left_edges_view,
c_left_inclusive,
right_edges_view,
c_right_inclusive,
)
)

return Column.from_unique_ptr(move(c_result))
plc_column = plc.labeling.label_bins(
input.to_pylibcudf(mode="read"),
left_edges.to_pylibcudf(mode="read"),
left_inclusive,
right_edges.to_pylibcudf(mode="read"),
right_inclusive
)
return Column.from_pylibcudf(plc_column)
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(cython_sources
groupby.pyx
interop.pyx
join.pyx
labeling.pyx
lists.pyx
merge.pyx
null_mask.pyx
Expand Down
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
filling,
groupby,
join,
labeling,
lists,
merge,
null_mask,
Expand Down
3 changes: 3 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
interop,
io,
join,
labeling,
lists,
merge,
null_mask,
Expand Down Expand Up @@ -67,7 +68,9 @@
"gpumemoryview",
"groupby",
"interop",
"io",
"join",
"labeling",
"lists",
"merge",
"null_mask",
Expand Down
14 changes: 14 additions & 0 deletions python/pylibcudf/pylibcudf/labeling.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libcpp cimport bool
from pylibcudf.libcudf.labeling cimport inclusive

from .column cimport Column


cpdef Column label_bins(
Column input,
Column left_edges,
bool left_inclusive,
Column right_edges,
bool right_inclusive
)
65 changes: 65 additions & 0 deletions python/pylibcudf/pylibcudf/labeling.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.libcudf cimport labeling as cpp_labeling
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.labeling cimport inclusive

from pylibcudf.libcudf.labeling import inclusive as Inclusive # no-cython-lint

from .column cimport Column


cpdef Column label_bins(
Column input,
Column left_edges,
bool left_inclusive,
Column right_edges,
bool right_inclusive
):
"""Labels elements based on membership in the specified bins.

Parameters
----------
input : Column
Column of input elements to label according to the specified bins.
left_edges : Column
Column of the left edge of each bin.
left_inclusive : bool
Whether or not the left edge is inclusive.
right_edges : Column
Column of the right edge of each bin.
right_inclusive : bool
Whether or not the right edge is inclusive.

Returns
-------
Column
Column of integer labels of the elements in `input`
according to the specified bins.
"""
cdef unique_ptr[column] c_result
cdef inclusive c_left_inclusive = (
inclusive.YES
if left_inclusive
else inclusive.NO
)
cdef inclusive c_right_inclusive = (
inclusive.YES
if right_inclusive
else inclusive.NO
)

with nogil:
c_result = move(
cpp_labeling.label_bins(
input.view(),
left_edges.view(),
c_left_inclusive,
right_edges.view(),
c_right_inclusive,
)
)

return Column.from_libcudf(move(c_result))
4 changes: 2 additions & 2 deletions python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# the License.
# =============================================================================

set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx reduce.pyx replace.pyx
round.pyx stream_compaction.pyx types.pyx unary.pyx
set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx labeling.pyx reduce.pyx
replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
)

set(linked_libraries cudf::cudf)
Expand Down
8 changes: 4 additions & 4 deletions python/pylibcudf/pylibcudf/libcudf/labeling.pxd
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from libcpp cimport int
from libcpp.memory cimport unique_ptr
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view


cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
ctypedef enum inclusive:
YES "cudf::inclusive::YES"
NO "cudf::inclusive::NO"
cpdef enum class inclusive(int):
YES
NO

cdef unique_ptr[column] label_bins (
const column_view &input,
Expand Down
Empty file.
25 changes: 25 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_labeling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pylibcudf as plc
import pytest


@pytest.mark.parametrize("left_inclusive", [True, False])
@pytest.mark.parametrize("right_inclusive", [True, False])
def test_label_bins(left_inclusive, right_inclusive):
in_col = plc.interop.from_arrow(pa.array([1, 2, 3]))
left_edges = plc.interop.from_arrow(pa.array([0, 5]))
right_edges = plc.interop.from_arrow(pa.array([4, 6]))
result = plc.interop.to_arrow(
plc.labeling.label_bins(
in_col, left_edges, left_inclusive, right_edges, right_inclusive
)
)
expected = pa.chunked_array([[0, 0, 0]], type=pa.int32())
assert result.equals(expected)


def test_Inclusive_enum():
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
assert plc.labeling.Inclusive.YES == 0
assert plc.labeling.Inclusive.NO == 1
Loading