Skip to content

Commit

Permalink
Expose wgpu features. Use Queue::write_texture in constructors.
Browse files Browse the repository at this point in the history
Adds a collection of features to `nannou_wgpu` that match those exposed
by `wgpu` itself. The `spirv` feature in particular will be useful for
users still using `.spv` shaders (as opposed to the new default `.wgsl`
format).

Rewrites the `Texture` short-hand constructors to use
`Queue::write_texture` rather than using copy commands and submitting
command buffers directly. This is to avoid an Intel driver bug where
submitting more than one command buffer per frame appears to be causing
issues:

gfx-rs/wgpu#1672 (comment)

While this likely means consuming more RAM, it also likely results in
slightly better performance due to reducing the number of command
buffers submitted.
  • Loading branch information
mitchmindtree committed Sep 23, 2021
1 parent a1dc2fe commit 81342b6
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 28 deletions.
7 changes: 6 additions & 1 deletion nannou_wgpu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ wgpu_upstream = { version = "0.10", package = "wgpu" }

[features]
capturer = ["image", "instant", "num_cpus"]
replay = ["wgpu_upstream/replay"]
serde = ["wgpu_upstream/serde"]
spirv = ["wgpu_upstream/spirv"]
trace = ["wgpu_upstream/trace"]
webgl = ["wgpu_upstream/webgl"]

[package.metadata.docs.rs]
features = ["image", "capturer"]
features = ["capturer", "image", "replay", "serde", "spirv", "trace", "webgl"]
23 changes: 23 additions & 0 deletions nannou_wgpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,20 @@ pub const DEFAULT_POWER_PREFERENCE: PowerPreference = PowerPreference::HighPerfo
/// Nannou's default WGPU backend preferences.
pub const DEFAULT_BACKENDS: Backends = Backends::PRIMARY;

/// Create a wgpu shader module from the given slice of SPIR-V bytes.
#[cfg(feature = "spirv")]
pub fn shader_from_spirv_bytes(
device: &wgpu_upstream::Device,
bytes: &[u8],
) -> wgpu_upstream::ShaderModule {
let source = util::make_spirv(bytes);
let desc = ShaderModuleDescriptor {
label: Some("nannou_shader_module"),
source,
};
device.create_shader_module(&desc)
}

/// Adds a simple render pass command to the given encoder that simply clears the given texture
/// with the given colour.
///
Expand Down Expand Up @@ -181,6 +195,15 @@ pub fn sampler_filtering(desc: &SamplerDescriptor) -> bool {
}
}

/// Given the initial number of bytes per row within an image, compute the number of bytes that
/// must be added per row to produce a valid bytes per row alignment.
///
/// See here:
/// https://docs.rs/wgpu/latest/wgpu/struct.ImageDataLayout.html#structfield.bytes_per_row
pub fn compute_row_padding(bytes_per_row: u32) -> u32 {
COPY_BYTES_PER_ROW_ALIGNMENT - (bytes_per_row % COPY_BYTES_PER_ROW_ALIGNMENT)
}

/// The functions within this module use unsafe in order to retrieve their input as a slice of
/// bytes. This is necessary in order to upload data to the GPU via the wgpu
/// `DeviceExt::create_buffer_init` buffer constructor. This method is unsafe as the type `T` may contain
Expand Down
149 changes: 128 additions & 21 deletions nannou_wgpu/src/texture/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ impl wgpu::Texture {
///
/// If the `&App` is passed as the `src`, the window returned via `app.main_window()` will be
/// used as the source of the device and queue.
pub fn from_path<'a, T, P>(src: T, path: P) -> image::ImageResult<Self>
pub fn from_path<T, P>(src: T, path: P) -> image::ImageResult<Self>
where
T: WithDeviceQueuePair,
P: AsRef<Path>,
Expand Down Expand Up @@ -123,7 +123,7 @@ impl wgpu::Texture {
///
/// The `DeviceQueuePairSource` can be either the `App`, a `Window`, a `DeviceQueuePair` or a
/// tuple `(&Device, &Queue)`.
pub fn from_image<'a, T>(src: T, image: &image::DynamicImage) -> Self
pub fn from_image<T>(src: T, image: &image::DynamicImage) -> Self
where
T: WithDeviceQueuePair,
{
Expand Down Expand Up @@ -498,6 +498,10 @@ where

/// Load a texture directly from a dynamic image.
///
/// This uses the `Queue::write_texture` method, meaning that the texture is not immediately
/// written. Rather, the write is enqueued internally and scheduled to happen at the start of the
/// next call to `Queue::submit`.
///
/// If the image is already in a format supported by wgpu, no conversions are performed and the
/// image is loaded directly as-is with a texture format that matches the original image color
/// type.
Expand All @@ -510,17 +514,37 @@ pub fn load_texture_from_image(
usage: wgpu::TextureUsages,
image: &image::DynamicImage,
) -> wgpu::Texture {
let cmd_encoder_desc = wgpu::CommandEncoderDescriptor {
label: Some("nannou_texture_from_image"),
};
let mut encoder = device.create_command_encoder(&cmd_encoder_desc);
let texture = encode_load_texture_from_image(device, &mut encoder, usage, image);
queue.submit(std::iter::once(encoder.finish()));
texture
use image::DynamicImage::*;
match image {
ImageLuma8(img) => load_texture_from_image_buffer(device, queue, usage, img),
ImageLumaA8(img) => load_texture_from_image_buffer(device, queue, usage, img),
ImageRgba8(img) => load_texture_from_image_buffer(device, queue, usage, img),
ImageBgra8(img) => load_texture_from_image_buffer(device, queue, usage, img),
ImageLuma16(img) => load_texture_from_image_buffer(device, queue, usage, img),
ImageLumaA16(img) => load_texture_from_image_buffer(device, queue, usage, img),
ImageRgba16(img) => load_texture_from_image_buffer(device, queue, usage, img),
ImageRgb8(_img) => {
let img = image.to_rgba8();
load_texture_from_image_buffer(device, queue, usage, &img)
}
ImageBgr8(_img) => {
let img = image.to_bgra8();
load_texture_from_image_buffer(device, queue, usage, &img)
}
ImageRgb16(_img) => {
// TODO: I think we lose some quality here - e.g. 16-bit channels down to 8-bit??.
let img = image.to_rgba8();
load_texture_from_image_buffer(device, queue, usage, &img)
}
}
}

/// Load a texture directly from an image buffer using the given device queue.
///
/// This uses the `Queue::write_texture` method, meaning that the texture is not immediately
/// written. Rather, the write is enqueued internally and scheduled to happen at the start of the
/// next call to `Queue::submit`.
///
/// No format or size conversions are performed - the given buffer is loaded directly into GPU
/// memory.
///
Expand All @@ -535,12 +559,38 @@ where
P: 'static + Pixel,
Container: std::ops::Deref<Target = [P::Subpixel]>,
{
let cmd_encoder_desc = wgpu::CommandEncoderDescriptor {
label: Some("nannou_texture_from_image_buffer"),
// Create the texture.
let texture = wgpu::TextureBuilder::from_image_view(buffer)
.usage(wgpu::TextureBuilder::REQUIRED_IMAGE_TEXTURE_USAGE | usage)
.build(device);

// Describe the layout of the data.
let extent = texture.extent();
let format = texture.format();
let block_size = format.describe().block_size;
let bytes_per_row = extent.width * block_size as u32;
let image_data_layout = wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: std::num::NonZeroU32::new(bytes_per_row),
rows_per_image: None,
};

// Copy into the entire texture.
let image_copy_texture = wgpu::ImageCopyTexture {
texture: texture.inner(),
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
// TODO: Maybe we shouldn't assume this?
aspect: wgpu::TextureAspect::All,
};
let mut encoder = device.create_command_encoder(&cmd_encoder_desc);
let texture = encode_load_texture_from_image_buffer(device, &mut encoder, usage, buffer);
queue.submit(std::iter::once(encoder.finish()));

// TODO:
// This can theoretically be exploited by implementing our `image::Pixel` trait for some type
// that has padding. Perhaps it should be an unsafe trait? Should investigate how to achieve
// this in a safer manner.
let data = unsafe { wgpu::bytes::from_slice(&*buffer) };

queue.write_texture(image_copy_texture, data, image_data_layout, extent);
texture
}

Expand All @@ -564,14 +614,71 @@ where
P: 'static + Pixel,
Container: 'a + std::ops::Deref<Target = [P::Subpixel]>,
{
let cmd_encoder_desc = wgpu::CommandEncoderDescriptor {
label: Some("nannou_load_3d_texture_from_image_buffers"),
let mut buffers = buffers.into_iter();
let array_layers = buffers.len() as u32;
let first_buffer = buffers.next()?;

// Build the texture ready to receive the data.
let (width, height) = first_buffer.dimensions();
let extent = wgpu::Extent3d {
width,
height,
depth_or_array_layers: array_layers,
};
let mut encoder = device.create_command_encoder(&cmd_encoder_desc);
let texture =
encode_load_texture_array_from_image_buffers(device, &mut encoder, usage, buffers);
queue.submit(std::iter::once(encoder.finish()));
texture
let texture = wgpu::TextureBuilder::from_image_view(first_buffer)
.extent(extent)
.dimension(wgpu::TextureDimension::D2) // force an array
.usage(wgpu::TextureBuilder::REQUIRED_IMAGE_TEXTURE_USAGE | usage)
.build(device);

// Describe the layout of the data.
let format = texture.format();
let block_size = format.describe().block_size;
let bytes_per_row = extent.width * block_size as u32;
let padding = wgpu::compute_row_padding(bytes_per_row);
let padded_bytes_per_row = bytes_per_row + padding;
let image_data_layout = wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: std::num::NonZeroU32::new(padded_bytes_per_row),
rows_per_image: std::num::NonZeroU32::new(height),
};

// Collect the data into a single slice, padding each row as necessary.
//
// NOTE: Previously we used `encode_load_texture_array_from_image_buffers` which avoids
// collecting the image data into a single slice. However, the `wgpu::Texture::from_*`
// constructors have been changed to avoid submitting an extra command buffer in favour
// of using `Queue::write_texture` which schedules the write for the next call to
// `Queue::submit`. This is to avoid an Intel driver bug where submitting more than one command
// buffer per frame appears to be causing issues:
// https://github.com/gfx-rs/wgpu/issues/1672#issuecomment-917510810
//
// While this likely means consuming more RAM, it also likely results in slightly better
// performance due to reducing the number of command buffers submitted.
//
// Users can still use `encode_load_texture_array_from_image_buffers` directly if they wish.
let capacity = bytes_per_row as usize * height as usize * array_layers as usize;
let mut data: Vec<u8> = Vec::with_capacity(capacity);
for buffer in Some(first_buffer).into_iter().chain(buffers) {
let layer_data = unsafe { wgpu::bytes::from_slice(&*buffer) };
for row_data in layer_data.chunks(bytes_per_row as usize) {
data.extend_from_slice(row_data);
data.extend((0..padding).map(|_| 0u8));
}
}

// Copy into the entire texture.
let image_copy_texture = wgpu::ImageCopyTexture {
texture: texture.inner(),
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
// TODO: Maybe we shouldn't assume this?
aspect: wgpu::TextureAspect::All,
};

queue.write_texture(image_copy_texture, &data, image_data_layout, extent);

Some(texture)
}

/// Encode the necessary commands to load a texture directly from a dynamic image.
Expand Down
7 changes: 1 addition & 6 deletions nannou_wgpu/src/texture/row_padded_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl RowPaddedBuffer {
///
/// Width should be given in bytes.
pub fn new(device: &wgpu::Device, width: u32, height: u32, usage: wgpu::BufferUsages) -> Self {
let row_padding = Self::compute_row_padding(width);
let row_padding = wgpu::compute_row_padding(width);

// only create mapped for buffers that we're going to write to.
let mapped_at_creation = usage.contains(wgpu::BufferUsages::MAP_WRITE);
Expand Down Expand Up @@ -66,11 +66,6 @@ impl RowPaddedBuffer {
)
}

/// Compute the necessary padding for each row.
fn compute_row_padding(width: u32) -> u32 {
wgpu::COPY_BYTES_PER_ROW_ALIGNMENT - (width % wgpu::COPY_BYTES_PER_ROW_ALIGNMENT)
}

/// The width of the buffer, in bytes, NOT including padding bytes.
pub fn width(&self) -> u32 {
self.width
Expand Down

0 comments on commit 81342b6

Please sign in to comment.