Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement maps_column_view abstraction over LIST<STRUCT<K,V>> #10380

Merged
merged 26 commits into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0702b6d
First attempt at restoring maps_column_view.
mythrocks Feb 27, 2022
fe5b88b
Detail headers added, for lists functions.
mythrocks Mar 1, 2022
04a8c9a
More tests for empties, nulls, slices, etc.
mythrocks Mar 1, 2022
ef8e97b
Formatting.
mythrocks Mar 1, 2022
be700ed
Doxygen for maps_column_view.
mythrocks Mar 1, 2022
7110ad0
Update Java bindings for mapLookup.
mythrocks Mar 3, 2022
9c0742c
Updated libcudf/meta.yaml for new detail header files.
mythrocks Mar 3, 2022
2752d9a
Updated formatting.
mythrocks Mar 3, 2022
1713718
Updated JNI bindings, tests, for integral types.
mythrocks Mar 8, 2022
736b16a
Added contains() to maps_column_view.
mythrocks Mar 8, 2022
62a1cb5
Update JNI bindings for mapContains.
mythrocks Mar 8, 2022
ddc1eed
Formatting.
mythrocks Mar 8, 2022
3014496
Updated JNI tests for MapKeyExistence.
mythrocks Mar 8, 2022
3b39271
Updated CUDF test for maps_column_view::contains().
mythrocks Mar 9, 2022
ed0bd0a
Moved maps_column_view to JNI.
mythrocks Mar 9, 2022
598ac17
Formatting, again.
mythrocks Mar 9, 2022
1910009
Updated copyright dates.
mythrocks Mar 9, 2022
cf722ff
Review comments:
mythrocks Mar 10, 2022
b67d5b0
Changed null offset to use numeric_limit::min(), not max().
mythrocks Mar 10, 2022
2febaaf
Added default for device_memory_resource in detail::extract().
mythrocks Mar 11, 2022
7ea5dbb
Return const references from keys(), values().
mythrocks Mar 14, 2022
631f0f8
Merge remote-tracking branch 'origin/branch-22.04' into maps-column-view
mythrocks Mar 14, 2022
d38648d
clang-format for java/ CMakeLists.txt.
mythrocks Mar 14, 2022
6138074
Alphabetical listing of source files.
mythrocks Mar 14, 2022
5c2c6b6
Revert "Alphabetical listing of source files."
mythrocks Mar 14, 2022
f69cf7f
Revert "clang-format for java/ CMakeLists.txt."
mythrocks Mar 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ test:
- test -f $PREFIX/include/cudf/labeling/label_bins.hpp
- test -f $PREFIX/include/cudf/lists/detail/combine.hpp
- test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/lists/detail/contains.hpp
- test -f $PREFIX/include/cudf/lists/detail/copying.hpp
- test -f $PREFIX/include/cudf/lists/detail/extract.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp
- test -f $PREFIX/include/cudf/lists/detail/drop_list_duplicates.hpp
- test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
Expand Down
78 changes: 78 additions & 0 deletions cpp/include/cudf/lists/detail/contains.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/lists/contains.hpp>
#include <cudf/lists/lists_column_view.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
cudf::lists::duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
cudf::lists::duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::scalar const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> contains(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::column_view const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> contains(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
} // namespace detail
} // namespace lists
} // namespace cudf
49 changes: 49 additions & 0 deletions cpp/include/cudf/lists/detail/extract.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/lists/extract.hpp>
#include <cudf/lists/lists_column_view.hpp>

namespace cudf {
namespace lists {
namespace detail {

/**
* @copydoc cudf::lists::extract_list_element(lists_column_view, size_type,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(
lists_column_view lists_column,
size_type const index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::lists::extract_list_element(lists_column_view, column_view const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(
lists_column_view lists_column,
column_view const& indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace lists
} // namespace cudf
61 changes: 30 additions & 31 deletions cpp/src/lists/contains.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/contains.hpp>
#include <cudf/lists/detail/contains.hpp>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/lists/lists_column_device_view.cuh>
#include <cudf/lists/lists_column_view.hpp>
Expand Down Expand Up @@ -251,18 +252,17 @@ std::unique_ptr<column> to_contains(std::unique_ptr<column>&& key_positions,

namespace detail {
/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::index_of(cudf::lists_column_view const&,
* cudf::scalar const&,
* duplicate_find_option,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return search_key.is_valid(stream)
? cudf::type_dispatcher(search_key.type(),
Expand All @@ -282,18 +282,17 @@ std::unique_ptr<column> index_of(
}

/**
* @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::index_of(cudf::lists_column_view const&,
* cudf::column_view const&,
* duplicate_find_option,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> index_of(
cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
duplicate_find_option find_option,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(search_keys.size() == lists.size(),
"Number of search keys must match list column size.");
Expand All @@ -316,10 +315,10 @@ std::unique_ptr<column> index_of(
}

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::scalar const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::contains(cudf::lists_column_view const&,
* cudf::scalar const&,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
cudf::scalar const& search_key,
Expand All @@ -331,10 +330,10 @@ std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
}

/**
* @copydoc cudf::lists::contains(cudf::lists_column_view const&,
* cudf::column_view const&,
* rmm::mr::device_memory_resource*)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @copydoc cudf::lists::detail::contains(cudf::lists_column_view const&,
* cudf::column_view const&,
* rmm::cuda_stream_view,
* rmm::mr::device_memory_resource*)
*/
std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
cudf::column_view const& search_keys,
Expand Down
35 changes: 28 additions & 7 deletions cpp/src/lists/extract.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@
#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/sequence.hpp>
#include <cudf/lists/detail/extract.hpp>
#include <cudf/lists/detail/gather.cuh>
#include <cudf/lists/extract.hpp>
#include <cudf/scalar/scalar_factories.hpp>
Expand Down Expand Up @@ -107,10 +108,10 @@ std::unique_ptr<cudf::column> make_index_offsets(size_type num_lists, rmm::cuda_
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
template <typename index_t>
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
index_t const& index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<column> extract_list_element_impl(lists_column_view lists_column,
index_t const& index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const num_lists = lists_column.size();
if (num_lists == 0) { return empty_like(lists_column.child()); }
Expand All @@ -135,6 +136,26 @@ std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
return std::move(extracted_lists->release().children[lists_column_view::child_column_index]);
}

/**
* @copydoc cudf::lists::extract_list_element
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
size_type const index,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element_impl(lists_column, index, stream, mr);
}

std::unique_ptr<column> extract_list_element(lists_column_view lists_column,
column_view const& indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element_impl(lists_column, indices, stream, mr);
}

} // namespace detail

/**
Expand All @@ -146,7 +167,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
size_type index,
rmm::mr::device_memory_resource* mr)
{
return detail::extract_list_element(lists_column, index, rmm::cuda_stream_default, mr);
return detail::extract_list_element_impl(lists_column, index, rmm::cuda_stream_default, mr);
}

/**
Expand All @@ -160,7 +181,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
{
CUDF_EXPECTS(indices.size() == lists_column.size(),
"Index column must have as many elements as lists column.");
return detail::extract_list_element(lists_column, indices, rmm::cuda_stream_default, mr);
return detail::extract_list_element_impl(lists_column, indices, rmm::cuda_stream_default, mr);
}

} // namespace lists
Expand Down
27 changes: 16 additions & 11 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -3244,17 +3244,23 @@ public final ColumnVector urlEncode() throws CudfException {
return new ColumnVector(urlEncode(getNativeView()));
}

/** For a column of type List<Struct<String, String>> and a passed in String key, return a string column
* for all the values in the struct that match the key, null otherwise.
* @param key the String scalar to lookup in the column
* @return a string column of values or nulls based on the lookup result
private static void assertIsSupportedMapKeyType(DType keyType) {
boolean isSupportedKeyType =
!keyType.equals(DType.EMPTY) && !keyType.equals(DType.LIST) && !keyType.equals(DType.STRUCT);
assert isSupportedKeyType : "Map lookup by STRUCT and LIST keys is not supported.";
}

/**
* Given a column of type List<Struct<X, Y>> and a key of type X, return a column of type Y,
* where each row in the output column is the Y value corresponding to the X key.
* If the key is not found, the corresponding output value is null.
* @param key the scalar key to lookup in the column
* @return a column of values or nulls based on the lookup result
*/
public final ColumnVector getMapValue(Scalar key) {

assert type.equals(DType.LIST) : "column type must be a LIST";
assert key != null : "target string may not be null";
assert key.getType().equals(DType.STRING) : "target string must be a string scalar";

assert key != null : "Lookup key may not be null";
assertIsSupportedMapKeyType(key.getType());
return new ColumnVector(mapLookup(getNativeView(), key.getScalarHandle()));
}

Expand All @@ -3266,9 +3272,8 @@ public final ColumnVector getMapValue(Scalar key) {
*/
public final ColumnVector getMapKeyExistence(Scalar key) {
assert type.equals(DType.LIST) : "column type must be a LIST";
assert key != null : "target string may not be null";
assert key.getType().equals(DType.STRING) : "target must be a string scalar";

assert key != null : "Lookup key may not be null";
assertIsSupportedMapKeyType(key.getType());
return new ColumnVector(mapContains(getNativeView(), key.getScalarHandle()));
}

Expand Down
1 change: 1 addition & 0 deletions java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ add_library(
src/TableJni.cpp
src/aggregation128_utils.cu
src/map_lookup.cu
src/maps_column_view.cu
src/row_conversion.cu
src/check_nvcomp_output_sizes.cu
)
Expand Down
Loading