Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
fixed accessing anonymous struct members
Browse files Browse the repository at this point in the history
  • Loading branch information
elstehle committed Oct 11, 2021
1 parent d3086fc commit 4f44fae
Showing 1 changed file with 25 additions and 24 deletions.
49 changes: 25 additions & 24 deletions cub/agent/agent_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,7 @@ private:
typename BlockSizeClassScanT::TempStorage size_scan_storage;

// Stage 2: Communicate the number ofer buffers requiring block-level collaboration
struct
{
typename BLevBuffScanPrefixCallbackOpT::TempStorage buffer_scan_callback;
};
typename BLevBuffScanPrefixCallbackOpT::TempStorage buffer_scan_callback;

// Stage 3; batch memcpy buffers that require only thread-level collaboration
struct
Expand All @@ -346,16 +343,16 @@ private:
{
typename BLevBlockScanPrefixCallbackOpT::TempStorage block_scan_callback;
typename BlockBLevTileCountScanT::TempStorage block_scan_storage;
};
} blev;

// Stage 3.3:
// Stage 3.3: run-length decode & block exchange for tlev
struct
{
typename BlockRunLengthDecodeT::TempStorage run_length_decode;
typename BlockExchangeTLevT::TempStorage block_exchange_storage;
};
} tlev;
};
};
} staged;
};
BufferOffsetT blev_buffer_offset;
};
Expand Down Expand Up @@ -505,7 +502,8 @@ private:
if (tile_id == 0)
{
BlockOffsetT block_aggregate;
BlockBLevTileCountScanT(temp_storage.block_scan_storage).ExclusiveSum(block_offset, block_offset, block_aggregate);
BlockBLevTileCountScanT(temp_storage.staged.blev.block_scan_storage)
.ExclusiveSum(block_offset, block_offset, block_aggregate);
if (threadIdx.x == 0)
{
blev_block_scan_state.SetInclusive(0, block_aggregate);
Expand All @@ -514,10 +512,10 @@ private:
else
{
BLevBlockScanPrefixCallbackOpT blev_tile_prefix_op(blev_block_scan_state,
temp_storage.block_scan_callback,
temp_storage.staged.blev.block_scan_callback,
Sum(),
tile_id);
BlockBLevTileCountScanT(temp_storage.block_scan_storage)
BlockBLevTileCountScanT(temp_storage.staged.blev.block_scan_storage)
.ExclusiveSum(block_offset, block_offset, blev_tile_prefix_op);
}
CTA_SYNC();
Expand Down Expand Up @@ -658,7 +656,7 @@ private:
// Evenly distribute all the bytes that have to be copied from all the buffers that require thread-level
// collaboration using BlockRunLengthDecode
uint32_t num_total_tlev_bytes = 0U;
BlockRunLengthDecodeT block_run_length_decode(temp_storage.run_length_decode,
BlockRunLengthDecodeT block_run_length_decode(temp_storage.staged.tlev.run_length_decode,
tlev_buffer_ids,
tlev_buffer_sizes,
num_total_tlev_bytes);
Expand All @@ -684,7 +682,7 @@ private:
}

// Exchange from blocked to striped arrangement for coalesced memory reads and writes
BlockExchangeTLevT(temp_storage.block_exchange_storage)
BlockExchangeTLevT(temp_storage.staged.tlev.block_exchange_storage)
.BlockedToStriped(zipped_byte_assignment, zipped_byte_assignment);

// Read in the bytes that this thread is assigned to
Expand Down Expand Up @@ -812,7 +810,7 @@ public:
CTA_SYNC();

// Scatter the buffers into one of the three partitions (TLEV, WLEV, BLEV) depending on their size
PartitionBuffersBySize(buffer_sizes, size_class_histogram, temp_storage.buffers_by_size_class);
PartitionBuffersBySize(buffer_sizes, size_class_histogram, temp_storage.staged.buffers_by_size_class);

// Ensure all buffers have been partitioned by their size class AND
// ensure that blev_buffer_offset has been written to shared memory
Expand All @@ -824,28 +822,31 @@ public:
BufferSizeIteratorT tile_buffer_sizes = buffer_sizes_it + buffer_offset;

// Copy block-level buffers
EnqueueBLEVBuffers(
&temp_storage.buffers_by_size_class[size_class_agg.Get(TLEV_SIZE_CLASS) + size_class_agg.Get(WLEV_SIZE_CLASS)],
tile_buffer_srcs,
tile_buffer_dsts,
tile_buffer_sizes,
size_class_agg.Get(BLEV_SIZE_CLASS),
temp_storage.blev_buffer_offset,
tile_id);
EnqueueBLEVBuffers(&temp_storage.staged.buffers_by_size_class[size_class_agg.Get(TLEV_SIZE_CLASS) +
size_class_agg.Get(WLEV_SIZE_CLASS)],
tile_buffer_srcs,
tile_buffer_dsts,
tile_buffer_sizes,
size_class_agg.Get(BLEV_SIZE_CLASS),
temp_storage.blev_buffer_offset,
tile_id);

// Ensure we can repurpose the temporary storage required by EnqueueBLEVBuffers
CTA_SYNC();

// Copy warp-level buffers
BatchMemcpyWLEVBuffers(&temp_storage.buffers_by_size_class[size_class_agg.Get(TLEV_SIZE_CLASS)],
BatchMemcpyWLEVBuffers(&temp_storage.staged.buffers_by_size_class[size_class_agg.Get(TLEV_SIZE_CLASS)],
tile_buffer_srcs,
tile_buffer_dsts,
tile_buffer_sizes,
size_class_agg.Get(WLEV_SIZE_CLASS));

// Perform batch memcpy for all the buffers that require thread-level collaboration
uint32_t num_tlev_buffers = size_class_agg.Get(TLEV_SIZE_CLASS);
BatchMemcpyTLEVBuffers(temp_storage.buffers_by_size_class, tile_buffer_srcs, tile_buffer_dsts, num_tlev_buffers);
BatchMemcpyTLEVBuffers(temp_storage.staged.buffers_by_size_class,
tile_buffer_srcs,
tile_buffer_dsts,
num_tlev_buffers);
}

//-----------------------------------------------------------------------------
Expand Down

0 comments on commit 4f44fae

Please sign in to comment.