Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wgpu-core] fix length of copy in queue_write_texture #2 #6009

Merged
merged 1 commit into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 43 additions & 2 deletions tests/tests/write_texture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static WRITE_TEXTURE_SUBSET_2D: GpuTestConfiguration =
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
bytemuck::cast_slice(&data),
&data,
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(size),
Expand Down Expand Up @@ -127,7 +127,7 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
bytemuck::cast_slice(&data),
&data,
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(size),
Expand Down Expand Up @@ -191,3 +191,44 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
assert_eq!(*byte, 0);
}
});

#[gpu_test]
static WRITE_TEXTURE_NO_OOB: GpuTestConfiguration =
GpuTestConfiguration::new().run_async(|ctx| async move {
let size = 256;

let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: None,
dimension: wgpu::TextureDimension::D2,
size: wgpu::Extent3d {
width: size,
height: size,
depth_or_array_layers: 1,
},
format: wgpu::TextureFormat::R8Uint,
usage: wgpu::TextureUsages::COPY_DST,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});
let data = vec![1u8; size as usize * 2 + 100]; // check that we don't attempt to copy OOB internally by adding 100 bytes here
ctx.queue.write_texture(
wgpu::ImageCopyTexture {
texture: &tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
&data,
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(size),
rows_per_image: Some(size),
},
wgpu::Extent3d {
width: size,
height: 2,
depth_or_array_layers: 1,
},
);
});
14 changes: 7 additions & 7 deletions wgpu-core/src/command/transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ pub(crate) fn validate_linear_texture_data(
// the copy size before calling this function (for example via `validate_texture_copy_range`).
let copy_width = copy_size.width as BufferAddress;
let copy_height = copy_size.height as BufferAddress;
let copy_depth = copy_size.depth_or_array_layers as BufferAddress;
let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress;

let offset = layout.offset;

Expand Down Expand Up @@ -253,19 +253,19 @@ pub(crate) fn validate_linear_texture_data(
}
bytes_per_row
} else {
if copy_depth > 1 || height_in_blocks > 1 {
if depth_or_array_layers > 1 || height_in_blocks > 1 {
return Err(TransferError::UnspecifiedBytesPerRow);
}
0
};
let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
let rows_per_image = rows_per_image as BufferAddress;
if rows_per_image < height_in_blocks {
return Err(TransferError::InvalidRowsPerImage);
}
rows_per_image
} else {
if copy_depth > 1 {
if depth_or_array_layers > 1 {
return Err(TransferError::UnspecifiedRowsPerImage);
}
0
Expand All @@ -287,12 +287,12 @@ pub(crate) fn validate_linear_texture_data(
}
}

let bytes_per_image = bytes_per_row * block_rows_per_image;
let bytes_per_image = bytes_per_row * rows_per_image;

let required_bytes_in_copy = if copy_depth == 0 {
let required_bytes_in_copy = if depth_or_array_layers == 0 {
0
} else {
let mut required_bytes_in_copy = bytes_per_image * (copy_depth - 1);
let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1);
if height_in_blocks > 0 {
required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row;
}
Expand Down
87 changes: 38 additions & 49 deletions wgpu-core/src/device/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ impl Global {

// Note: `_source_bytes_per_array_layer` is ignored since we
// have a staging copy, and it can have a different value.
let (_, _source_bytes_per_array_layer) = validate_linear_texture_data(
let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
data_layout,
dst.desc.format,
destination.aspect,
Expand All @@ -682,32 +682,6 @@ impl Global {
.map_err(TransferError::from)?;
}

let (block_width, block_height) = dst.desc.format.block_dimensions();
let width_blocks = size.width / block_width;
let height_blocks = size.height / block_height;

let block_rows_per_image = data_layout.rows_per_image.unwrap_or(
// doesn't really matter because we need this only if we copy
// more than one layer, and then we validate for this being not
// None
height_blocks,
);

let block_size = dst
.desc
.format
.block_copy_size(Some(destination.aspect))
.unwrap();
let bytes_per_row_alignment =
get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
let stage_bytes_per_row =
wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment);

let block_rows_in_copy =
(size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks;
let stage_size =
wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64).unwrap();

let mut pending_writes = device.pending_writes.lock();
let encoder = pending_writes.activate();

Expand Down Expand Up @@ -763,33 +737,47 @@ impl Global {

let dst_raw = dst.try_raw(&snatch_guard)?;

let bytes_per_row = data_layout
.bytes_per_row
.unwrap_or(width_blocks * block_size);
let (block_width, block_height) = dst.desc.format.block_dimensions();
let width_in_blocks = size.width / block_width;
let height_in_blocks = size.height / block_height;

let block_size = dst
.desc
.format
.block_copy_size(Some(destination.aspect))
.unwrap();
let bytes_in_last_row = width_in_blocks * block_size;

let bytes_per_row = data_layout.bytes_per_row.unwrap_or(bytes_in_last_row);
let rows_per_image = data_layout.rows_per_image.unwrap_or(height_in_blocks);

let bytes_per_row_alignment =
get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
let stage_bytes_per_row = wgt::math::align_to(bytes_in_last_row, bytes_per_row_alignment);

// Platform validation requires that the staging buffer always be
// freed, even if an error occurs. All paths from here must call
// `device.pending_writes.consume`.
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;

if stage_bytes_per_row == bytes_per_row {
let staging_buffer = if stage_bytes_per_row == bytes_per_row {
profiling::scope!("copy aligned");
// Fast path if the data is already being aligned optimally.
unsafe {
staging_buffer.write_with_offset(
data,
data_layout.offset as isize,
0,
(data.len() as u64 - data_layout.offset) as usize,
);
}
let stage_size = wgt::BufferSize::new(required_bytes_in_copy).unwrap();
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
staging_buffer.write(&data[data_layout.offset as usize..]);
staging_buffer
} else {
profiling::scope!("copy chunked");
// Copy row by row into the optimal alignment.
let block_rows_in_copy =
(size.depth_or_array_layers - 1) * rows_per_image + height_in_blocks;
let stage_size =
wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64)
.unwrap();
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
for layer in 0..size.depth_or_array_layers {
let rows_offset = layer * block_rows_per_image;
for row in rows_offset..rows_offset + height_blocks {
let rows_offset = layer * rows_per_image;
for row in rows_offset..rows_offset + height_in_blocks {
let src_offset = data_layout.offset as u32 + row * bytes_per_row;
let dst_offset = row * stage_bytes_per_row;
unsafe {
Expand All @@ -802,20 +790,21 @@ impl Global {
}
}
}
}
staging_buffer
};

let staging_buffer = staging_buffer.flush();

let regions = (0..array_layer_count).map(|rel_array_layer| {
let regions = (0..array_layer_count).map(|array_layer_offset| {
let mut texture_base = dst_base.clone();
texture_base.array_layer += rel_array_layer;
texture_base.array_layer += array_layer_offset;
hal::BufferTextureCopy {
buffer_layout: wgt::ImageDataLayout {
offset: rel_array_layer as u64
* block_rows_per_image as u64
offset: array_layer_offset as u64
* rows_per_image as u64
* stage_bytes_per_row as u64,
bytes_per_row: Some(stage_bytes_per_row),
rows_per_image: Some(block_rows_per_image),
rows_per_image: Some(rows_per_image),
},
texture_base,
size: hal_copy_size,
Expand Down
Loading