Skip to content

Commit

Permalink
wgpu: Allow unaligned writes to IndexBuffer3D
Browse files Browse the repository at this point in the history
wgpu requires buffer copy sizes and offsets to be 4-byte aligned.
Unfortunately, ActionScript can perform 2-byte aligned uploads
into an IndexBuffer3D.

To support this, we now keep a copy of the IndexBuffer3D on the CPU.
When performing an upload to the buffer, we round the offset down
and the size up to the nearest 4-byte aligned value. The cpu buffer
is used to fill out the write with existing data, so that we don't
corrupt the contents of the GPU buffer.

To avoid introducing a new RefCell, I've changed IndexBuffer3D
to use a `Box` instead of an `Rc` to store the trait object.
This allows us to pass a mutable reference down to the backend.
  • Loading branch information
Aaron1011 committed May 10, 2023
1 parent ea78bf2 commit e488cc9
Show file tree
Hide file tree
Showing 12 changed files with 1,027 additions and 33 deletions.
4 changes: 0 additions & 4 deletions core/src/avm2/globals/flash/display3D/index_buffer_3d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ pub fn upload_from_vector<'gc>(

index_buffer.set_count(count as usize, activation.context.gc_context);

if start_offset != 0 {
panic!("What exactly does start_offset do?");
}

let data: Result<Vec<u16>, _> = vector
.iter()
.map(|val| {
Expand Down
6 changes: 4 additions & 2 deletions core/src/avm2/object/context3d_object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,15 @@ impl<'gc> Context3DObject<'gc> {
start_offset: usize,
activation: &mut Activation<'_, 'gc>,
) {
let mut handle = buffer.handle(activation.context.gc_context);
self.0
.write(activation.context.gc_context)
.render_context
.as_mut()
.unwrap()
.process_command(
Context3DCommand::UploadToIndexBuffer {
buffer: buffer.handle(),
buffer: &mut *handle,
data,
start_offset,
},
Expand Down Expand Up @@ -279,6 +280,7 @@ impl<'gc> Context3DObject<'gc> {
// FIXME - should we error if the number of indices isn't a multiple of 3?
num_triangles = (index_buffer.count() / 3) as i32;
}
let handle = index_buffer.handle(activation.context.gc_context);

self.0
.write(activation.context.gc_context)
Expand All @@ -287,7 +289,7 @@ impl<'gc> Context3DObject<'gc> {
.unwrap()
.process_command(
Context3DCommand::DrawTriangles {
index_buffer: index_buffer.handle(),
index_buffer: &*handle,
first_index: first_index as usize,
num_triangles: num_triangles as isize,
},
Expand Down
9 changes: 4 additions & 5 deletions core/src/avm2/object/index_buffer_3d_object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::avm2::Error;
use gc_arena::{Collect, GcCell, MutationContext};
use ruffle_render::backend::IndexBuffer;
use std::cell::{Ref, RefMut};
use std::rc::Rc;

use super::Context3DObject;

Expand All @@ -20,7 +19,7 @@ impl<'gc> IndexBuffer3DObject<'gc> {
pub fn from_handle(
activation: &mut Activation<'_, 'gc>,
context3d: Context3DObject<'gc>,
handle: Rc<dyn IndexBuffer>,
handle: Box<dyn IndexBuffer>,
) -> Result<Object<'gc>, Error<'gc>> {
let class = activation.avm2().classes().indexbuffer3d;
let base = ScriptObjectData::new(class);
Expand Down Expand Up @@ -50,8 +49,8 @@ impl<'gc> IndexBuffer3DObject<'gc> {
self.0.write(mc).count = val;
}

pub fn handle(&self) -> Rc<dyn IndexBuffer> {
self.0.read().handle.clone()
pub fn handle(&self, mc: MutationContext<'gc, '_>) -> RefMut<'_, dyn IndexBuffer> {
RefMut::map(self.0.write(mc), |data| &mut *data.handle)
}

pub fn context3d(&self) -> Context3DObject<'gc> {
Expand All @@ -65,7 +64,7 @@ pub struct IndexBuffer3DObjectData<'gc> {
/// Base script object
base: ScriptObjectData<'gc>,

handle: Rc<dyn IndexBuffer>,
handle: Box<dyn IndexBuffer>,

count: usize,

Expand Down
11 changes: 6 additions & 5 deletions render/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ pub trait Context3D: Collect + Downcast {
// objects after dispose() has been called.
fn disposed_vertex_buffer_handle(&self) -> Rc<dyn VertexBuffer>;

fn create_index_buffer(&mut self, usage: BufferUsage, num_indices: u32) -> Rc<dyn IndexBuffer>;
fn create_index_buffer(&mut self, usage: BufferUsage, num_indices: u32)
-> Box<dyn IndexBuffer>;
fn create_vertex_buffer(
&mut self,
usage: BufferUsage,
Expand All @@ -218,7 +219,7 @@ pub trait Context3D: Collect + Downcast {

fn process_command<'gc>(
&mut self,
command: Context3DCommand<'gc>,
command: Context3DCommand<'_, 'gc>,
mc: MutationContext<'gc, '_>,
);
}
Expand Down Expand Up @@ -338,7 +339,7 @@ impl Context3DTextureFilter {

#[derive(Collect)]
#[collect(no_drop)]
pub enum Context3DCommand<'gc> {
pub enum Context3DCommand<'a, 'gc> {
Clear {
red: f64,
green: f64,
Expand All @@ -365,7 +366,7 @@ pub enum Context3DCommand<'gc> {
SetRenderToBackBuffer,

UploadToIndexBuffer {
buffer: Rc<dyn IndexBuffer>,
buffer: &'a mut dyn IndexBuffer,
start_offset: usize,
data: Vec<u8>,
},
Expand All @@ -378,7 +379,7 @@ pub enum Context3DCommand<'gc> {
},

DrawTriangles {
index_buffer: Rc<dyn IndexBuffer>,
index_buffer: &'a dyn IndexBuffer,
first_index: usize,
num_triangles: isize,
},
Expand Down
73 changes: 56 additions & 17 deletions render/wgpu/src/context3d/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,12 @@ impl WgpuContext3D {

#[derive(Collect)]
#[collect(require_static)]
pub struct IndexBufferWrapper(wgpu::Buffer);
pub struct IndexBufferWrapper {
pub buffer: wgpu::Buffer,
/// A cpu-side copy of the buffer data. This is used to allow us to
/// perform unaligned writes to the GPU buffer, which is required by ActionScript.
pub data: Vec<u8>,
}

#[derive(Collect, Debug)]
#[collect(require_static)]
Expand Down Expand Up @@ -347,14 +352,18 @@ impl Context3D for WgpuContext3D {
&mut self,
_ruffle_usage: ruffle_render::backend::BufferUsage,
num_indices: u32,
) -> Rc<dyn IndexBuffer> {
) -> Box<dyn IndexBuffer> {
let size = align_copy_buffer_size(num_indices as usize * std::mem::size_of::<u16>()) as u32;
let buffer = self.descriptors.device.create_buffer(&BufferDescriptor {
label: None,
size: num_indices as u64 * 2,
size: size as u64,
usage: BufferUsages::INDEX | BufferUsages::COPY_DST,
mapped_at_creation: false,
});
Rc::new(IndexBufferWrapper(buffer))
Box::new(IndexBufferWrapper {
buffer,
data: vec![0; size as usize],
})
}

fn create_vertex_buffer(
Expand Down Expand Up @@ -459,7 +468,7 @@ impl Context3D for WgpuContext3D {

fn process_command<'gc>(
&mut self,
command: Context3DCommand<'gc>,
command: Context3DCommand<'_, 'gc>,
mc: MutationContext<'gc, '_>,
) {
match command {
Expand Down Expand Up @@ -645,20 +654,40 @@ impl Context3D for WgpuContext3D {
start_offset,
data,
} => {
let buffer: &IndexBufferWrapper = buffer
.as_any()
.downcast_ref::<IndexBufferWrapper>()
if data.is_empty() {
return;
}
let buffer: &mut IndexBufferWrapper = buffer
.as_any_mut()
.downcast_mut::<IndexBufferWrapper>()
.unwrap();

// Unfortunately, ActionScript works with 2-byte indices, while wgpu requires
// copy offsets and sizes to have 4-byte alignment. To support this, we need
// to keep a copy of the data on the CPU side. We round *down* the offset to
// the closest multiple of 4 bytes, and round *up* the length to the closest
// multiple of 4 bytes. We then perform a copy from our CPU-side buffer, which
// which uses the existing data (at the beiginning or end) to fill out the copy
// to the required length and offset. Without this, we would lose data in the CPU
// buffer whenever we performed a copy with an unalignd offset or length.
let offset_bytes = start_offset * std::mem::size_of::<u16>();
let rounded_down_offset =
offset_bytes - (offset_bytes % COPY_BUFFER_ALIGNMENT as usize);
let rounded_up_length = align_copy_buffer_size(data.len());

buffer.data[offset_bytes..(offset_bytes + data.len())].copy_from_slice(&data);
self.buffer_staging_belt
.write_buffer(
&mut self.buffer_command_encoder,
&buffer.0,
(start_offset * std::mem::size_of::<u16>()) as u64,
NonZeroU64::new(data.len() as u64).unwrap(),
&buffer.buffer,
rounded_down_offset as u64,
NonZeroU64::new(rounded_up_length as u64).unwrap(),
&self.descriptors.device,
)
.copy_from_slice(&data);
.copy_from_slice(
&buffer.data
[rounded_down_offset..(rounded_down_offset + rounded_up_length)],
);
}

Context3DCommand::UploadToVertexBuffer {
Expand All @@ -667,21 +696,24 @@ impl Context3D for WgpuContext3D {
data32_per_vertex,
data,
} => {
if data.is_empty() {
return;
}

let buffer: Rc<VertexBufferWrapper> = buffer
.clone()
.into_any_rc()
.downcast::<VertexBufferWrapper>()
.unwrap();

let align = COPY_BUFFER_ALIGNMENT as usize;
let rounded_size = (data.len() + align - 1) & !(align - 1);

// ActionScript can only work with 32-bit chunks of data, so our `write_buffer`
// offset and size will always be a multiple of `COPY_BUFFER_ALIGNMENT` (4 bytes)
self.buffer_staging_belt.write_buffer(
&mut self.buffer_command_encoder,
&buffer.buffer,
(start_vertex * (data32_per_vertex as usize) * std::mem::size_of::<f32>())
as u64,
NonZeroU64::new(rounded_size as u64).unwrap(),
NonZeroU64::new(data.len() as u64).unwrap(),
&self.descriptors.device,
)[..data.len()]
.copy_from_slice(&data);
Expand Down Expand Up @@ -805,7 +837,8 @@ impl Context3D for WgpuContext3D {

let mut render_pass = self.make_render_pass(&mut render_command_encoder);

render_pass.set_index_buffer(index_buffer.0.slice(..), wgpu::IndexFormat::Uint16);
render_pass
.set_index_buffer(index_buffer.buffer.slice(..), wgpu::IndexFormat::Uint16);
render_pass.draw_indexed(indices, 0, 0..1);

// A `RenderPass` needs to hold references to several fields in `self`, so we can't
Expand Down Expand Up @@ -1126,3 +1159,9 @@ fn convert_texture_format(input: Context3DTextureFormat) -> Result<wgpu::Texture
)),
}
}

// Rounds up 'len' to the nearest multiple of COPY_BUFFER_ALIGNMENT
fn align_copy_buffer_size(len: usize) -> usize {
let align = COPY_BUFFER_ALIGNMENT as usize;
(len + align - 1) & !(align - 1)
}
Loading

0 comments on commit e488cc9

Please sign in to comment.