Skip to content

Commit

Permalink
Add HLL++ evaluation function
Browse files Browse the repository at this point in the history
  • Loading branch information
Chong Gao committed Oct 30, 2024
1 parent d0a55aa commit b6f5cf5
Showing 6 changed files with 258 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -195,6 +195,7 @@ add_library(
src/HashJni.cpp
src/HistogramJni.cpp
src/HostTableJni.cpp
src/HLLPPJni.cpp
src/JSONUtilsJni.cpp
src/NativeParquetJni.cpp
src/ParseURIJni.cpp
@@ -203,6 +204,7 @@ add_library(
src/SparkResourceAdaptorJni.cpp
src/SubStringIndexJni.cpp
src/ZOrderJni.cpp
src/HLLPP.cu
src/bloom_filter.cu
src/case_when.cu
src/cast_decimal_to_string.cu
108 changes: 108 additions & 0 deletions src/main/cpp/src/HLLPP.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "HLLPP.hpp"

#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <cuco/detail/hyperloglog/finalizer.cuh>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>

namespace spark_rapids_jni {

namespace {

// The number of bits required by register value. Register value stores num of zeros.
// XXHash64 value is 64 bits, it's safe to use 6 bits to store a register value.
constexpr int REGISTER_VALUE_BITS = 6;

// MASK binary 6 bits: 111111
constexpr uint64_t MASK = 1L << REGISTER_VALUE_BITS - 1L;

// One long stores 10 register values
constexpr int REGISTERS_PER_LONG = 64 / REGISTER_VALUE_BITS;

__device__ inline int get_register_value(int64_t const long_10_registers, int reg_idx)
{
int64_t shift_mask = MASK << (REGISTER_VALUE_BITS * reg_idx);
int64_t v = (long_10_registers & shift_mask) >> (REGISTER_VALUE_BITS * reg_idx);
return static_cast<int>(v);
}

struct estimate_fn {
cudf::device_span<int64_t const*> sketch_longs;
int const precision;
int64_t* const out;

__device__ void operator()(cudf::size_type const idx) const
{
auto const num_regs = 1ull << precision;
double sum = 0;
int zeroes = 0;

for (auto reg_idx = 0; reg_idx < num_regs; ++reg_idx) {
// each long contains 10 register values
int long_col_idx = reg_idx / REGISTERS_PER_LONG;
int reg_idx_in_long = reg_idx % REGISTERS_PER_LONG;
int reg = get_register_value(sketch_longs[long_col_idx][idx], reg_idx_in_long);
sum += double{1} / static_cast<double>(1ull << reg);
zeroes += reg == 0;
}

auto const finalize = cuco::hyperloglog_ns::detail::finalizer(precision);
out[idx] = finalize(sum, zeroes);
}
};

} // end anonymous namespace

std::unique_ptr<cudf::column> estimate_from_hll_sketches(cudf::column_view const& input,
int precision,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
printf("my-debug: precision: %d\n", precision);
CUDF_EXPECTS(precision >= 4, "HLL++ requires at least 4 bits for addressing.");

auto const input_iter = cudf::detail::make_counting_transform_iterator(
0, [&](int i) { return input.child(i).begin<int64_t>(); });
auto input_cols = std::vector<int64_t const*>(input_iter, input_iter + input.num_children());
auto d_inputs = cudf::detail::make_device_uvector_async(input_cols, stream, mr);
auto result = cudf::make_numeric_column(
cudf::data_type{cudf::type_id::INT64}, input.size(), cudf::mask_state::ALL_VALID, stream);

// evaluate from struct<long, ..., long>
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
input.size(),
estimate_fn{d_inputs, precision, result->mutable_view().data<int64_t>()});

printf("my-debug: e2\n");
return result;
}

} // namespace spark_rapids_jni
32 changes: 32 additions & 0 deletions src/main/cpp/src/HLLPP.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/resource_ref.hpp>

namespace spark_rapids_jni {

std::unique_ptr<cudf::column> estimate_from_hll_sketches(
cudf::column_view const& input,
int pricision,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

} // namespace spark_rapids_jni
34 changes: 34 additions & 0 deletions src/main/cpp/src/HLLPPJni.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "HLLPP.hpp"
#include "cudf_jni_apis.hpp"

extern "C" {

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_HLLPP_estimateDistinctValueFromSketches(
JNIEnv* env, jclass, jlong sketches, jint pricision)
{
JNI_NULL_CHECK(env, sketches, "Sketch column is null", 0);
try {
cudf::jni::auto_set_device(env);
auto const sketch_view = reinterpret_cast<cudf::column_view const*>(sketches);
return cudf::jni::ptr_as_jlong(
spark_rapids_jni::estimate_from_hll_sketches(*sketch_view, pricision).release());
}
CATCH_STD(env, 0);
}
}
45 changes: 45 additions & 0 deletions src/main/java/com/nvidia/spark/rapids/jni/HLLPP.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.jni;

import ai.rapids.cudf.ColumnVector;
import ai.rapids.cudf.ColumnView;
import ai.rapids.cudf.NativeDepsLoader;

/**
* HyperLogLogPlusPlus
*/
public class HLLPP {
static {
NativeDepsLoader.loadNativeDeps();
}

/**
* Compute the approximate count distinct value from sketch values.
* <p>
* The input sketch values must be given in the format `LIST<INT8>`.
*
* @param input The sketch column which constains `LIST<INT8> values.
* @param precision The num of bits for addressing.
* @return A INT64 column with each value indicates the approximate count distinct value.
*/
public static ColumnVector estimateDistinctValueFromSketches(ColumnView input, int precision) {
return new ColumnVector(estimateDistinctValueFromSketches(input.getNativeView(), precision));
}

private static native long estimateDistinctValueFromSketches(long inputHandle, int precision);
}
37 changes: 37 additions & 0 deletions src/test/java/com/nvidia/spark/rapids/jni/HLLPPTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.jni;

import ai.rapids.cudf.GroupByAggregation;
import ai.rapids.cudf.Table;

import org.junit.jupiter.api.Test;


public class HLLPPTest {

@Test
void testGroupByHLL() {
// A trivial test:
try (Table input = new Table.TestBuilder().column(1, 2, 3, 1, 2, 2, 1, 3, 3, 2)
.column(0, 1, -2, 3, -4, -5, -6, 7, -8, 9)
.build()){
input.groupBy(0).aggregate(GroupByAggregation.HLLPP(0)
.onColumn(1));
}
}
}

0 comments on commit b6f5cf5

Please sign in to comment.