Skip to content

Commit

Permalink
Make all sort functions encode stability in the signature
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Feb 14, 2024
1 parent 98a9610 commit b00da09
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 120 deletions.
25 changes: 7 additions & 18 deletions python/cudf/cudf/_lib/pylibcudf/sorting.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ from .column cimport Column
from .table cimport Table


cpdef Column sorted_order(Table source_table, list column_order, list null_precedence)

cpdef Column stable_sorted_order(
cpdef Column sorted_order(
Table source_table,
list column_order,
list null_precedence,
*,
bool stable,
)

cpdef Column rank(
Expand All @@ -34,28 +34,17 @@ cpdef Table segmented_sort_by_key(
Column segment_offsets,
list column_order,
list null_precedence,
)

cpdef Table stable_segmented_sort_by_key(
Table values,
Table keys,
Column segment_offsets,
list column_order,
list null_precedence,
*,
bool stable,
)

cpdef Table sort_by_key(
Table values,
Table keys,
list column_order,
list null_precedence,
)

cpdef Table stable_sort_by_key(
Table values,
Table keys,
list column_order,
list null_precedence,
*,
bool stable,
)

cpdef Table sort(Table source_table, list column_order, list null_precedence)
159 changes: 66 additions & 93 deletions python/cudf/cudf/_lib/pylibcudf/sorting.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector
Expand All @@ -14,41 +15,12 @@ from .column cimport Column
from .table cimport Table


cpdef Column sorted_order(Table source_table, list column_order, list null_precedence):
"""Computes the row indices required to sort the table.
Parameters
----------
source_table : Table
The table to sort.
column_order : List[ColumnOrder]
Whether each column should be sorted in ascending or descending order.
null_precedence : List[NullOrder]
Whether nulls should come before or after non-nulls.
Returns
-------
Column
The row indices required to sort the table.
"""
cdef unique_ptr[column] c_result
cdef vector[order] c_orders = column_order
cdef vector[null_order] c_null_precedence = null_precedence
with nogil:
c_result = move(
cpp_sorting.sorted_order(
source_table.view(),
c_orders,
c_null_precedence,
)
)
return Column.from_libcudf(move(c_result))


cpdef Column stable_sorted_order(
cpdef Column sorted_order(
Table source_table,
list column_order,
list null_precedence,
*,
bool stable,
):
"""Computes the row indices required to sort the table, maintaining input order.
Expand All @@ -60,6 +32,8 @@ cpdef Column stable_sorted_order(
Whether each column should be sorted in ascending or descending order.
null_precedence : List[NullOrder]
Whether nulls should come before or after non-nulls.
stable : bool
Whether to maintain input order for equal elements.
Returns
-------
Expand All @@ -70,13 +44,22 @@ cpdef Column stable_sorted_order(
cdef vector[order] c_orders = column_order
cdef vector[null_order] c_null_precedence = null_precedence
with nogil:
c_result = move(
cpp_sorting.stable_sorted_order(
source_table.view(),
c_orders,
c_null_precedence,
if stable:
c_result = move(
cpp_sorting.stable_sorted_order(
source_table.view(),
c_orders,
c_null_precedence,
)
)
else:
c_result = move(
cpp_sorting.sorted_order(
source_table.view(),
c_orders,
c_null_precedence,
)
)
)
return Column.from_libcudf(move(c_result))


Expand Down Expand Up @@ -199,12 +182,14 @@ cpdef Table segmented_sort_by_key(
return Table.from_libcudf(move(c_result))


cpdef Table stable_segmented_sort_by_key(
cpdef Table segmented_sort_by_key(
Table values,
Table keys,
Column segment_offsets,
list column_order,
list null_precedence,
*,
bool stable,
):
"""Sorts the table by key, within segments, maintaining input order.
Expand All @@ -220,6 +205,8 @@ cpdef Table stable_segmented_sort_by_key(
Whether each column should be sorted in ascending or descending order.
null_precedence : List[NullOrder]
Whether nulls should come before or after non-nulls.
stable : bool
Whether to maintain input order for equal elements.
Returns
-------
Expand All @@ -230,62 +217,36 @@ cpdef Table stable_segmented_sort_by_key(
cdef vector[order] c_orders = column_order
cdef vector[null_order] c_null_precedence = null_precedence
with nogil:
c_result = move(
cpp_sorting.stable_segmented_sort_by_key(
values.view(),
keys.view(),
segment_offsets.view(),
c_orders,
c_null_precedence,
if stable:
c_result = move(
cpp_sorting.stable_segmented_sort_by_key(
values.view(),
keys.view(),
segment_offsets.view(),
c_orders,
c_null_precedence,
)
)
)
return Table.from_libcudf(move(c_result))


cpdef Table sort_by_key(
Table values,
Table keys,
list column_order,
list null_precedence,
):
"""Sorts the table by key.
Parameters
----------
values : Table
The table to sort.
keys : Table
The table to sort by.
column_order : List[ColumnOrder]
Whether each column should be sorted in ascending or descending order.
null_precedence : List[NullOrder]
Whether nulls should come before or after non-nulls.
Returns
-------
Table
The sorted table.
"""
cdef unique_ptr[table] c_result
cdef vector[order] c_orders = column_order
cdef vector[null_order] c_null_precedence = null_precedence
with nogil:
c_result = move(
cpp_sorting.sort_by_key(
values.view(),
keys.view(),
c_orders,
c_null_precedence,
else:
c_result = move(
cpp_sorting.segmented_sort_by_key(
values.view(),
keys.view(),
segment_offsets.view(),
c_orders,
c_null_precedence,
)
)
)
return Table.from_libcudf(move(c_result))


cpdef Table stable_sort_by_key(
cpdef Table sort_by_key(
Table values,
Table keys,
list column_order,
list null_precedence,
*,
bool stable,
):
"""Sorts the table by key, maintaining input order.
Expand All @@ -299,6 +260,8 @@ cpdef Table stable_sort_by_key(
Whether each column should be sorted in ascending or descending order.
null_precedence : List[NullOrder]
Whether nulls should come before or after non-nulls.
stable : bool
Whether to maintain input order for equal elements.
Returns
-------
Expand All @@ -309,14 +272,24 @@ cpdef Table stable_sort_by_key(
cdef vector[order] c_orders = column_order
cdef vector[null_order] c_null_precedence = null_precedence
with nogil:
c_result = move(
cpp_sorting.stable_sort_by_key(
values.view(),
keys.view(),
c_orders,
c_null_precedence,
if stable:
c_result = move(
cpp_sorting.stable_sort_by_key(
values.view(),
keys.view(),
c_orders,
c_null_precedence,
)
)
else:
c_result = move(
cpp_sorting.sort_by_key(
values.view(),
keys.view(),
c_orders,
c_null_precedence,
)
)
)
return Table.from_libcudf(move(c_result))


Expand Down
15 changes: 6 additions & 9 deletions python/cudf/cudf/_lib/sort.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -149,15 +149,15 @@ def order_by(
Column of indices that sorts the table
"""
order = ordering(ascending, repeat(na_position))
func = getattr(pylibcudf.sorting, f"{'stable_' if stable else ''}sorted_order")

return Column.from_pylibcudf(
func(
pylibcudf.sorting.sorted_order(
pylibcudf.Table(
[c.to_pylibcudf(mode="read") for c in columns_from_table],
),
order[0],
order[1],
stable=stable
)
)

Expand Down Expand Up @@ -231,13 +231,13 @@ def sort_by_key(
list of value columns sorted by keys
"""
order = ordering(ascending, na_position)
func = getattr(pylibcudf.sorting, f"{'stable_' if stable else ''}sort_by_key")
return columns_from_pylibcudf_table(
func(
pylibcudf.sorting.sort_by_key(
pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]),
pylibcudf.Table([c.to_pylibcudf(mode="read") for c in keys]),
order[0],
order[1],
stable=stable,
)
)

Expand Down Expand Up @@ -283,17 +283,14 @@ def segmented_sort_by_key(
column_order or repeat(True, ncol),
null_precedence or repeat("first", ncol),
)
func = getattr(
pylibcudf.sorting,
f"{'stable_' if stable else ''}segmented_sort_by_key"
)
return columns_from_pylibcudf_table(
func(
pylibcudf.sorting.segmented_sort_by_key(
pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]),
pylibcudf.Table([c.to_pylibcudf(mode="read") for c in keys]),
segment_offsets.to_pylibcudf(mode="read"),
order[0],
order[1],
stable=stable,
)
)

Expand Down

0 comments on commit b00da09

Please sign in to comment.