Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STRING order-by column for RANGE window functions #13143

Merged
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
866069e
Use `column_device_view.element()` instead of `.data()` for window ra…
mythrocks Apr 7, 2023
4683e3f
Review comments:
mythrocks Apr 12, 2023
6f7da5b
Merge remote-tracking branch 'origin/branch-23.06' into refactor-grou…
mythrocks Apr 12, 2023
f449c91
Working!
mythrocks Apr 13, 2023
f4eea1b
Spell out order-by column.
mythrocks Apr 13, 2023
5edf8b8
Merge remote-tracking branch 'mythrocks/refactor-grouped-range-query'…
mythrocks Apr 13, 2023
db18ed2
Merge remote-tracking branch 'origin/branch-23.06' into refactor-grou…
mythrocks Apr 13, 2023
5f56fb2
Merge remote-tracking branch 'mythrocks/refactor-grouped-range-query'…
mythrocks Apr 13, 2023
8044767
Merge remote-tracking branch 'origin/branch-23.06' into refactor-grou…
mythrocks Apr 14, 2023
dbde97b
Merge remote-tracking branch 'mythrocks/refactor-grouped-range-query'…
mythrocks Apr 14, 2023
96d6a04
Working tests for ASC STRING order-by column.
mythrocks Apr 14, 2023
e752d64
Merge remote-tracking branch 'origin/branch-23.06' into string-orderb…
mythrocks Apr 14, 2023
7f48119
Minor test refactor. Current-row tests.
mythrocks Apr 14, 2023
c9f1a3b
Const, where applicable.
mythrocks Apr 14, 2023
e07c019
Null orderby tests.
mythrocks Apr 14, 2023
a19b1b9
Unpartitioned cases, for ASC, NO_NULL.
mythrocks Apr 14, 2023
3933c71
Unpartitioned cases, for all.
mythrocks Apr 14, 2023
a84e88e
Formatting.
mythrocks Apr 17, 2023
182bf2d
Merge remote-tracking branch 'origin/branch-23.06' into string-orderb…
mythrocks Apr 17, 2023
57426b3
Removed cruft. Added check for string window bounds.
mythrocks Apr 17, 2023
3d3e170
Updated copyright date.
mythrocks Apr 17, 2023
ab45abd
Merge remote-tracking branch 'origin/branch-23.06' into string-orderb…
mythrocks Apr 18, 2023
21c78f3
Merge remote-tracking branch 'origin/branch-23.06' into string-orderb…
mythrocks Apr 19, 2023
e24b7dc
Review changes.
mythrocks Apr 19, 2023
9afef05
Switch to CUDF_TEST_EXPECT_COLUMNS_EQUAL.
mythrocks Apr 19, 2023
ae9b837
Merge remote-tracking branch 'origin/branch-23.06' into string-orderb…
mythrocks Apr 20, 2023
75ee17f
Merge branch 'branch-23.06' into string-orderby-range-window
ttnghia Apr 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions cpp/include/cudf/rolling/range_window_bounds.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,15 +26,27 @@ namespace cudf {
*
* Similar to `window_bounds` in `grouped_rolling_window()`, `range_window_bounds`
* represents window boundaries for use with `grouped_range_rolling_window()`.
* A window may be specified as either of the following:
* A window may be specified as one of the following:
* 1. A fixed-width numeric scalar value. E.g.
* a) A `DURATION_DAYS` scalar, for use with a `TIMESTAMP_DAYS` orderby column
* b) An `INT32` scalar, for use with an `INT32` orderby column
* 2. "unbounded", indicating that the bounds stretch to the first/last
* row in the group.
* 3. "current row", indicating that the bounds end at the first/last
* row in the group that match the value of the current row.
*/
struct range_window_bounds {
public:
/**
* The type of range_window_bounds.
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
*/
enum class extent : int32_t {
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
CURRENT_ROW = 0, /// Bounds defined as the first/last row that matches the current row.
BOUNDED, /// Bounds defined as the first/last row that falls within
/// a specified range from the current row
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
UNBOUNDED /// Bounds stretching to the first/last row in the entire group.
};

/**
* @brief Factory method to construct a bounded window boundary.
*
Expand All @@ -43,6 +55,23 @@ struct range_window_bounds {
*/
static range_window_bounds get(scalar const& boundary);

/**
* @brief Factory method to construct a window boundary
* limited to the value of the current row
*
* @param type type The datatype of the window boundary
* @return A "current row" window boundary object
*/
static range_window_bounds current_row(data_type type);

/**
* @brief Whether or not the window is bounded to the current row
*
* @return true If window is bounded to the current row
* @return false If window is not bounded to the current row
*/
[[nodiscard]] bool is_current_row() const { return _extent == extent::CURRENT_ROW; }

/**
* @brief Factory method to construct an unbounded window boundary.
*
Expand All @@ -57,7 +86,7 @@ struct range_window_bounds {
* @return true If window is unbounded
* @return false If window is of finite bounds
*/
[[nodiscard]] bool is_unbounded() const { return _is_unbounded; }
[[nodiscard]] bool is_unbounded() const { return _extent == extent::UNBOUNDED; }

/**
* @brief Returns the underlying scalar value for the bounds
Expand All @@ -70,10 +99,10 @@ struct range_window_bounds {
range_window_bounds() = default; // Required for use as return types from dispatch functors.

private:
const bool _is_unbounded{true};
const extent _extent{extent::UNBOUNDED};
std::shared_ptr<scalar> _range_scalar{nullptr}; // To enable copy construction/assignment.

range_window_bounds(bool is_unbounded_, std::unique_ptr<scalar> range_scalar_);
range_window_bounds(extent extent_, std::unique_ptr<scalar> range_scalar_);
};

} // namespace cudf
5 changes: 3 additions & 2 deletions cpp/src/rolling/detail/range_window_bounds.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ constexpr bool is_supported_range_type()
}

/// Checks if the specified type is a supported target type,
/// as an orderby column, for comparisons with a range_window_bounds scalar.
/// as an order-by column, for comparisons with a range_window_bounds scalar.
template <typename ColumnType>
constexpr bool is_supported_order_by_column_type()
{
return cudf::is_timestamp<ColumnType>() || cudf::is_fixed_point<ColumnType>() ||
(std::is_integral_v<ColumnType> && !cudf::is_boolean<ColumnType>());
(std::is_integral_v<ColumnType> && !cudf::is_boolean<ColumnType>()) ||
std::is_same_v<ColumnType, cudf::string_view>;
}

/// Range-comparable representation type for an orderby column type.
Expand Down
31 changes: 25 additions & 6 deletions cpp/src/rolling/grouped_rolling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,11 @@ __device__ ElementT compute_lowest_in_window(ElementIter orderby_iter,
size_type idx,
ElementT delta)
{
return subtract_safe(orderby_iter[idx], delta);
if constexpr (std::is_same_v<ElementT, cudf::string_view>) {
return orderby_iter[idx];
} else {
return subtract_safe(orderby_iter[idx], delta);
}
}

/**
Expand All @@ -291,7 +295,11 @@ __device__ ElementT compute_highest_in_window(ElementIter orderby_iter,
size_type idx,
ElementT delta)
{
return add_safe(orderby_iter[idx], delta);
if constexpr (std::is_same_v<ElementT, cudf::string_view>) {
return orderby_iter[idx];
} else {
return add_safe(orderby_iter[idx], delta);
}
}

/**
Expand Down Expand Up @@ -869,10 +877,21 @@ std::unique_ptr<column> grouped_range_rolling_window_impl(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto preceding_value =
detail::range_comparable_value<OrderByT>(preceding_window, orderby_column.type(), stream);
auto following_value =
detail::range_comparable_value<OrderByT>(following_window, orderby_column.type(), stream);
auto [preceding_value, following_value] = [&] {
if constexpr (std::is_same_v<OrderByT, cudf::string_view>) {
CUDF_EXPECTS(
preceding_window.is_unbounded() || preceding_window.is_current_row(),
"For STRING order-by column, preceding range has to be either UNBOUNDED or CURRENT ROW.");
CUDF_EXPECTS(
following_window.is_unbounded() || following_window.is_current_row(),
"For STRING order-by column, following range has to be either UNBOUNDED or CURRENT ROW.");
return std::pair{cudf::string_view{}, cudf::string_view{}};
} else {
return std::pair{
detail::range_comparable_value<OrderByT>(preceding_window, orderby_column.type(), stream),
detail::range_comparable_value<OrderByT>(following_window, orderby_column.type(), stream)};
}
}();

if (order_of_orderby_column == cudf::order::ASCENDING) {
return group_offsets.is_empty() ? range_window_ASC(input,
Expand Down
20 changes: 13 additions & 7 deletions cpp/src/rolling/range_window_bounds.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -62,23 +62,29 @@ struct range_scalar_constructor {

} // namespace

range_window_bounds::range_window_bounds(bool is_unbounded_, std::unique_ptr<scalar> range_scalar_)
: _is_unbounded{is_unbounded_}, _range_scalar{std::move(range_scalar_)}
range_window_bounds::range_window_bounds(extent extent_, std::unique_ptr<scalar> range_scalar_)
: _extent{extent_}, _range_scalar{std::move(range_scalar_)}
{
CUDF_EXPECTS(_range_scalar.get(), "Range window scalar cannot be null.");
CUDF_EXPECTS(_is_unbounded || _range_scalar->is_valid(),
"Bounded Range window scalar must be valid.");
CUDF_EXPECTS(
_extent == extent::UNBOUNDED || _extent == extent::CURRENT_ROW || _range_scalar->is_valid(),
"Bounded Range window scalar must be valid.");
}

range_window_bounds range_window_bounds::unbounded(data_type type)
{
return range_window_bounds(true, make_default_constructed_scalar(type));
return range_window_bounds(extent::UNBOUNDED, make_default_constructed_scalar(type));
}

range_window_bounds range_window_bounds::current_row(data_type type)
{
return range_window_bounds(extent::CURRENT_ROW, make_default_constructed_scalar(type));
}

range_window_bounds range_window_bounds::get(scalar const& boundary)
{
return range_window_bounds{
false, cudf::type_dispatcher(boundary.type(), range_scalar_constructor{}, boundary)};
extent::BOUNDED, cudf::type_dispatcher(boundary.type(), range_scalar_constructor{}, boundary)};
}

} // namespace cudf
Loading