Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into dask-cudf-clip
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora authored Dec 9, 2024
2 parents a7c4af2 + ebad043 commit 2468c49
Show file tree
Hide file tree
Showing 44 changed files with 418 additions and 778 deletions.
18 changes: 17 additions & 1 deletion cpp/include/cudf/detail/utilities/device_operators.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -83,7 +83,11 @@ struct DeviceSum {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support device operator identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support device operator identity");
#endif
return T{};
}
};
Expand Down Expand Up @@ -141,7 +145,11 @@ struct DeviceMin {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support DeviceMin identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support DeviceMin identity");
#endif
return cuda::std::numeric_limits<T>::max();
}

Expand Down Expand Up @@ -189,7 +197,11 @@ struct DeviceMax {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support DeviceMax identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support DeviceMax identity");
#endif
return cuda::std::numeric_limits<T>::lowest();
}

Expand Down Expand Up @@ -225,7 +237,11 @@ struct DeviceProduct {
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
static constexpr T identity()
{
#ifndef __CUDA_ARCH__
CUDF_FAIL("fixed_point does not yet support DeviceProduct identity");
#else
CUDF_UNREACHABLE("fixed_point does not yet support DeviceProduct identity");
#endif
return T{1, numeric::scale_type{0}};
}
};
Expand Down
20 changes: 12 additions & 8 deletions cpp/include/cudf/detail/utilities/integer_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright 2019 BlazingDB, Inc.
* Copyright 2019 Eyal Rozenberg <eyalroz@blazingdb.com>
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -134,16 +134,20 @@ constexpr I div_rounding_up_safe(std::integral_constant<bool, true>, I dividend,
} // namespace detail

/**
* Divides the left-hand-side by the right-hand-side, rounding up
* @brief Divides the left-hand-side by the right-hand-side, rounding up
* to an integral multiple of the right-hand-side, e.g. (9,5) -> 2 , (10,5) -> 2, (11,5) -> 3.
*
* @param dividend the number to divide
* @param divisor the number of by which to divide
* @return The least integer multiple of {@link divisor} which is greater than or equal to
* the non-integral division dividend/divisor.
* The result is undefined if `divisor == 0` or
* if `divisor == -1` and `dividend == min<I>()`.
*
* Will not overflow, and may _or may not_ be slower than the intuitive
* approach of using `(dividend + divisor - 1) / divisor`.
*
* @note will not overflow, and may _or may not_ be slower than the intuitive
* approach of using (dividend + divisor - 1) / divisor
* @tparam I Integer type for `dividend`, `divisor`, and the return type
* @param dividend The number to divide
* @param divisor The number by which to divide
* @return The least integer multiple of `divisor` which is greater than or equal to
* the non-integral division `dividend/divisor`
*/
template <typename I>
constexpr I div_rounding_up_safe(I dividend, I divisor) noexcept
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/utilities/span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,9 @@ class base_2dspan {
constexpr base_2dspan(RowType<T, dynamic_extent> flat_view, size_t columns)
: _flat{flat_view}, _size{columns == 0 ? 0 : flat_view.size() / columns, columns}
{
#ifndef __CUDA_ARCH__
CUDF_EXPECTS(_size.first * _size.second == flat_view.size(), "Invalid 2D span size");
#endif
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ size_t max_varint_size()
return cudf::util::div_rounding_up_unsafe(sizeof(T) * 8, 7);
}

constexpr size_t RLE_stream_size(TypeKind kind, size_t count)
size_t RLE_stream_size(TypeKind kind, size_t count)
{
using cudf::util::div_rounding_up_unsafe;
constexpr auto byte_rle_max_len = 128;
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/utilities/time_utils.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,7 +32,7 @@ static const __device__ __constant__ int32_t powers_of_ten[10] = {

struct get_period {
template <typename T>
constexpr int32_t operator()()
int32_t operator()()
{
if constexpr (is_chrono<T>()) { return T::period::den; }
CUDF_FAIL("Invalid, non chrono type");
Expand All @@ -42,7 +42,7 @@ struct get_period {
/**
* @brief Function that translates cuDF time unit to clock frequency
*/
constexpr int32_t to_clockrate(type_id timestamp_type_id)
inline int32_t to_clockrate(type_id timestamp_type_id)
{
return timestamp_type_id == type_id::EMPTY
? 0
Expand Down
11 changes: 7 additions & 4 deletions java/src/main/java/ai/rapids/cudf/Rmm.java
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ private static void setGlobalValsFromResource(RmmDeviceMemoryResource resource)
* {@link RmmAllocationMode#CUDA_DEFAULT},
* {@link RmmAllocationMode#POOL},
* {@link RmmAllocationMode#ARENA},
* {@link RmmAllocationMode#CUDA_ASYNC} and
* {@link RmmAllocationMode#CUDA_ASYNC},
* {@link RmmAllocationMode#CUDA_ASYNC_FABRIC} and
* {@link RmmAllocationMode#CUDA_MANAGED_MEMORY}
* @param logConf How to do logging or null if you don't want to
* @param poolSize The initial pool size in bytes
Expand All @@ -221,6 +222,7 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
boolean isPool = (allocationMode & RmmAllocationMode.POOL) != 0;
boolean isArena = (allocationMode & RmmAllocationMode.ARENA) != 0;
boolean isAsync = (allocationMode & RmmAllocationMode.CUDA_ASYNC) != 0;
boolean isAsyncFabric = (allocationMode & RmmAllocationMode.CUDA_ASYNC_FABRIC) != 0;
boolean isManaged = (allocationMode & RmmAllocationMode.CUDA_MANAGED_MEMORY) != 0;

if (isAsync && isManaged) {
Expand All @@ -246,6 +248,9 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
} else if (isAsync) {
resource = new RmmLimitingResourceAdaptor<>(
new RmmCudaAsyncMemoryResource(poolSize, poolSize), poolSize, 512);
} else if (isAsyncFabric) {
resource = new RmmLimitingResourceAdaptor<>(
new RmmCudaAsyncMemoryResource(poolSize, poolSize, true), poolSize, 512);
} else if (isManaged) {
resource = new RmmManagedMemoryResource();
} else {
Expand Down Expand Up @@ -521,7 +526,6 @@ public static DeviceMemoryBuffer alloc(long size, Cuda.Stream stream) {

private static native long allocInternal(long size, long stream) throws RmmException;


static native void free(long ptr, long length, long stream) throws RmmException;

/**
Expand Down Expand Up @@ -562,7 +566,7 @@ static native long newArenaMemoryResource(long childHandle,

static native void releaseArenaMemoryResource(long handle);

static native long newCudaAsyncMemoryResource(long size, long release) throws RmmException;
static native long newCudaAsyncMemoryResource(long size, long release, boolean fabric) throws RmmException;

static native void releaseCudaAsyncMemoryResource(long handle);

Expand All @@ -575,7 +579,6 @@ static native long newLoggingResourceAdaptor(long handle, int type, String path,

static native void releaseLoggingResourceAdaptor(long handle);


static native long newTrackingResourceAdaptor(long handle, long alignment) throws RmmException;

static native void releaseTrackingResourceAdaptor(long handle);
Expand Down
7 changes: 6 additions & 1 deletion java/src/main/java/ai/rapids/cudf/RmmAllocationMode.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,4 +36,9 @@ public class RmmAllocationMode {
* Use CUDA async suballocation strategy
*/
public static final int CUDA_ASYNC = 0x00000008;
/**
* Use CUDA async suballocation strategy with fabric handles that are
* peer accessible with read-write access
*/
public static final int CUDA_ASYNC_FABRIC = 0x00000010;
}
15 changes: 13 additions & 2 deletions java/src/main/java/ai/rapids/cudf/RmmCudaAsyncMemoryResource.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,9 +30,20 @@ public class RmmCudaAsyncMemoryResource implements RmmDeviceMemoryResource {
* @param releaseThreshold size in bytes for when memory is released back to cuda
*/
public RmmCudaAsyncMemoryResource(long size, long releaseThreshold) {
this(size, releaseThreshold, false);
}

/**
* Create a new async memory resource
* @param size the initial size of the pool
* @param releaseThreshold size in bytes for when memory is released back to cuda
* @param fabric if true request peer read+write accessible fabric handles when
* creating the pool
*/
public RmmCudaAsyncMemoryResource(long size, long releaseThreshold, boolean fabric) {
this.size = size;
this.releaseThreshold = releaseThreshold;
handle = Rmm.newCudaAsyncMemoryResource(size, releaseThreshold);
handle = Rmm.newCudaAsyncMemoryResource(size, releaseThreshold, fabric);
}

@Override
Expand Down
14 changes: 9 additions & 5 deletions java/src/main/native/src/RmmJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,14 +772,18 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_releaseArenaMemoryResource(JNIEnv
CATCH_STD(env, )
}

JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_newCudaAsyncMemoryResource(JNIEnv* env,
jclass clazz,
jlong init,
jlong release)
JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_newCudaAsyncMemoryResource(
JNIEnv* env, jclass clazz, jlong init, jlong release, jboolean fabric)
{
try {
cudf::jni::auto_set_device(env);
auto ret = new rmm::mr::cuda_async_memory_resource(init, release);

auto handle_type =
fabric ? std::optional{rmm::mr::cuda_async_memory_resource::allocation_handle_type::fabric}
: std::nullopt;

auto ret = new rmm::mr::cuda_async_memory_resource(init, release, handle_type);

return reinterpret_cast<jlong>(ret);
}
CATCH_STD(env, 0)
Expand Down
2 changes: 0 additions & 2 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ set(cython_sources
stream_compaction.pyx
string_casting.pyx
strings_udf.pyx
transform.pyx
types.pyx
utils.pyx
)
Expand All @@ -46,4 +45,3 @@ target_link_libraries(interop PUBLIC nanoarrow)

add_subdirectory(io)
add_subdirectory(nvtext)
add_subdirectory(strings)
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
sort,
stream_compaction,
string_casting,
strings,
strings_udf,
)

Expand Down
46 changes: 1 addition & 45 deletions python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io

from cudf._lib.utils import _index_level_name, generate_pandas_metadata

from libc.stdint cimport int64_t, uint8_t
from libc.stdint cimport int64_t
from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from pylibcudf.expressions cimport Expression
from pylibcudf.io.parquet cimport ChunkedParquetReader
Expand All @@ -47,47 +44,6 @@ from pylibcudf cimport Table
from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT
from pylibcudf.io.types cimport TableInputMetadata, SinkInfo, ColumnInMetadata
from pylibcudf.io.parquet cimport ParquetChunkedWriter
from cython.operator cimport dereference


cdef class BufferArrayFromVector:
cdef Py_ssize_t length
cdef unique_ptr[vector[uint8_t]] in_vec

# these two things declare part of the buffer interface
cdef Py_ssize_t shape[1]
cdef Py_ssize_t strides[1]

@staticmethod
cdef BufferArrayFromVector from_unique_ptr(
unique_ptr[vector[uint8_t]] in_vec
):
cdef BufferArrayFromVector buf = BufferArrayFromVector()
buf.in_vec = move(in_vec)
buf.length = dereference(buf.in_vec).size()
return buf

def __getbuffer__(self, Py_buffer *buffer, int flags):
cdef Py_ssize_t itemsize = sizeof(uint8_t)

self.shape[0] = self.length
self.strides[0] = 1

buffer.buf = dereference(self.in_vec).data()

buffer.format = NULL # byte
buffer.internal = NULL
buffer.itemsize = itemsize
buffer.len = self.length * itemsize # product(shape) * itemsize
buffer.ndim = 1
buffer.obj = self
buffer.readonly = 0
buffer.shape = self.shape
buffer.strides = self.strides
buffer.suboffsets = NULL

def __releasebuffer__(self, Py_buffer *buffer):
pass


def _parse_metadata(meta):
Expand Down
15 changes: 0 additions & 15 deletions python/cudf/cudf/_lib/strings/CMakeLists.txt

This file was deleted.

15 changes: 0 additions & 15 deletions python/cudf/cudf/_lib/strings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,3 @@
detokenize,
tokenize_with_vocabulary,
)
from cudf._lib.strings.convert.convert_fixed_point import to_decimal
from cudf._lib.strings.convert.convert_floats import is_float
from cudf._lib.strings.convert.convert_integers import is_integer
from cudf._lib.strings.convert.convert_urls import url_decode, url_encode
from cudf._lib.strings.split.partition import partition, rpartition
from cudf._lib.strings.split.split import (
rsplit,
rsplit_re,
rsplit_record,
rsplit_record_re,
split,
split_re,
split_record,
split_record_re,
)
Loading

0 comments on commit 2468c49

Please sign in to comment.