Skip to content

Commit

Permalink
Complete the initial fix
Browse files Browse the repository at this point in the history
  • Loading branch information
kingcrimsontianyu committed Dec 10, 2024
1 parent bc3f71c commit 16f8173
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 20 deletions.
28 changes: 14 additions & 14 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -799,20 +799,20 @@ set_source_files_properties(
PROPERTIES COMPILE_DEFINITIONS "_FILE_OFFSET_BITS=64"
)

set_property(
SOURCE
src/io/orc/dict_enc.cu
src/io/orc/reader_impl.cu
src/io/orc/reader_impl_chunking.cu
src/io/orc/reader_impl_decode.cu
src/io/orc/stats_enc.cu
src/io/orc/stripe_data.cu
src/io/orc/stripe_enc.cu
src/io/orc/stripe_init.cu
src/io/orc/writer_impl.cu
APPEND
PROPERTY COMPILE_OPTIONS "-g;-G"
)
# set_property(
# SOURCE
# src/io/orc/dict_enc.cu
# src/io/orc/reader_impl.cu
# src/io/orc/reader_impl_chunking.cu
# src/io/orc/reader_impl_decode.cu
# src/io/orc/stats_enc.cu
# src/io/orc/stripe_data.cu
# src/io/orc/stripe_enc.cu
# src/io/orc/stripe_init.cu
# src/io/orc/writer_impl.cu
# APPEND
# PROPERTY COMPILE_OPTIONS "-g;-G"
# )

set_property(
SOURCE src/io/parquet/writer_impl.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/orc_io/debug/breakpoints.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
break /home/coder/cudf/cpp/src/io/orc/stripe_data.cu:167
break /home/coder/cudf/cpp/src/io/orc/stripe_data.cu:210

# break /home/coder/cudf/cpp/src/io/orc/stripe_data.cu:1398
# break /home/coder/cudf/cpp/src/io/orc/stripe_data.cu:646
Expand Down
36 changes: 31 additions & 5 deletions cpp/src/io/orc/stripe_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,6 @@ class run_cache {

__forceinline__ __device__ void write_to_cache(int64_t* src)
{
// Block until the src data, generated by the 1st warp, for the thread block are ready.
__syncthreads();
const auto tid = threadIdx.x;

// All threads in the block take a uniform code path.
Expand All @@ -190,10 +188,33 @@ class run_cache {
__syncthreads();
if (tid == 0) { _status = status::DISABLED; }
}
__syncthreads();
}

__forceinline__ __device__ void read_from_cache([[maybe_unused]] uint64_t* dst) {}
__forceinline__ __device__ void read_from_cache(int64_t* dst, orc_rlev2_state_s* rle)
{
const auto tid = threadIdx.x;

// All threads in the block take a uniform code path.
// _reusable_length ranges between [0, 512]
if (_status == status::CAN_READ_FROM_CACHE and _reusable_length > 0) {
// First, shift the data up
const auto dst_idx = tid + _reusable_length;
const auto v = (dst_idx < rle->num_vals + _reusable_length) ? dst[tid] : 0;
__syncthreads();

if (dst_idx < rle->num_vals + _reusable_length) { dst[dst_idx] = v; }
__syncthreads();

// Second, insert the cached data
if (tid < _reusable_length) { dst[tid] = _buf[tid]; }
__syncthreads();

if (tid == 0) {
_status = status::DISABLED;
rle->num_vals += _reusable_length;
}
}
}

private:
status _status;
Expand Down Expand Up @@ -940,9 +961,14 @@ static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs,
}
__syncwarp();
}
__syncthreads();
if constexpr (cuda::std::is_same_v<T, int64_t>) {
if (run_cache_bs != nullptr) { run_cache_bs->write_to_cache(vals); }
if (run_cache_bs != nullptr) {
run_cache_bs->read_from_cache(vals, rle);
run_cache_bs->write_to_cache(vals);
}
}
__syncthreads();
return rle->num_vals;
}

Expand Down

0 comments on commit 16f8173

Please sign in to comment.