From 1af673aafa7261aa0e9501e54f134cda2a62a13e Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Wed, 12 Jul 2023 10:23:42 +0200 Subject: [PATCH] Implement bind group layout deduplication for all configurations Currently wgpu-core implement bind group layout deduplication only when it creates its own resource IDs. In other words it works for wgpu but not in Firefox. This PR bridges the gap by allowing an optional indirection in bind group layouts: each BGL may store an ID referring to its "deduplicated" BGL. When referring to a BGL the rest of the code must make sure to follow the indirection. The exception is command buffer processing which is considered hot code and where we first validate against the provided BGL ID and only follow the indirection if the initial check failed. The main pain point with this approach is the various places where wgpu-core manually updates reference counts: we have to be careful about following the indirection to track the right BGL. --- wgpu-core/src/binding_model.rs | 32 ++++++++++++++++++ wgpu-core/src/command/bind.rs | 35 +++++++++++++++----- wgpu-core/src/command/compute.rs | 16 ++++++--- wgpu-core/src/command/render.rs | 27 +++++++++++---- wgpu-core/src/device/global.rs | 57 +++++++++++++++++++++++--------- wgpu-core/src/device/life.rs | 26 ++++++++++----- wgpu-core/src/device/resource.rs | 19 ++++++++--- wgpu-core/src/hub.rs | 1 + 8 files changed, 165 insertions(+), 48 deletions(-) diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index a06476a2121..98b09f0fae9 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -436,6 +436,8 @@ pub struct BindGroupLayoutDescriptor<'a> { pub(crate) type BindEntryMap = FastHashMap; +pub type BindGroupLayouts = crate::storage::Storage, BindGroupLayoutId>; + /// Bind group layout. /// /// The lifetime of BGLs is a bit special. They are only referenced on CPU @@ -450,6 +452,12 @@ pub struct BindGroupLayout { pub(crate) device_id: Stored, pub(crate) multi_ref_count: MultiRefCount, pub(crate) entries: BindEntryMap, + // When a layout created and there already exists a compatible layout the new layout + // keeps a reference to the older compatible one. In some places we the substitute + // the bind group layout id with its compatible sibbling. + // Since this substitution can come at a cost, it is skipped when wgpu-core generates + // its own resource IDs. + pub(crate) compatible_layout: Option>, #[allow(unused)] pub(crate) dynamic_count: usize, pub(crate) count_validator: BindingTypeMaxCountValidator, @@ -472,6 +480,30 @@ impl Resource for BindGroupLayout { } } +// If a bindgroup needs to be substitued with its compatible equivalent, return the latter. +pub(crate) fn try_get_bind_group_layout( + layouts: &BindGroupLayouts, + id: BindGroupLayoutId, +) -> Option<&BindGroupLayout> { + let layout = layouts.get(id).ok()?; + if let Some(compat) = layout.compatible_layout { + return Some(&layouts[compat]); + } + + Some(layout) +} + +pub(crate) fn get_bind_group_layout( + layouts: &BindGroupLayouts, + id: Valid, +) -> (Valid, &BindGroupLayout) { + let layout = &layouts[id]; + layout + .compatible_layout + .map(|compat| (compat, &layouts[compat])) + .unwrap_or((id, layout)) +} + #[derive(Clone, Debug, Error)] #[non_exhaustive] pub enum CreatePipelineLayoutError { diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs index 5bba3e80ed4..05f90c6bc91 100644 --- a/wgpu-core/src/command/bind.rs +++ b/wgpu-core/src/command/bind.rs @@ -1,5 +1,7 @@ use crate::{ - binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout}, + binding_model::{ + BindGroup, BindGroupLayouts, LateMinBufferBindingSizeMismatch, PipelineLayout, + }, device::SHADER_STAGE_COUNT, hal_api::HalApi, id::{BindGroupId, PipelineLayoutId, Valid}, @@ -13,7 +15,10 @@ use arrayvec::ArrayVec; type BindGroupMask = u8; mod compat { - use crate::id::{BindGroupLayoutId, Valid}; + use crate::{ + binding_model::BindGroupLayouts, + id::{BindGroupLayoutId, Valid}, + }; use std::ops::Range; #[derive(Debug, Default)] @@ -27,8 +32,16 @@ mod compat { self.assigned.is_some() && self.expected.is_some() } - fn is_valid(&self) -> bool { - self.expected.is_none() || self.expected == self.assigned + fn is_valid(&self, bind_group_layouts: &BindGroupLayouts) -> bool { + if self.expected.is_none() || self.expected == self.assigned { + return true; + } + + if let Some(id) = self.assigned { + return bind_group_layouts[id].compatible_layout == self.expected; + } + + false } } @@ -88,9 +101,12 @@ mod compat { .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None }) } - pub fn invalid_mask(&self) -> super::BindGroupMask { + pub fn invalid_mask( + &self, + bind_group_layouts: &BindGroupLayouts, + ) -> super::BindGroupMask { self.entries.iter().enumerate().fold(0, |mask, (i, entry)| { - if entry.is_valid() { + if entry.is_valid(bind_group_layouts) { mask } else { mask | 1u8 << i @@ -276,8 +292,11 @@ impl Binder { .map(move |index| payloads[index].group_id.as_ref().unwrap().value) } - pub(super) fn invalid_mask(&self) -> BindGroupMask { - self.manager.invalid_mask() + pub(super) fn invalid_mask( + &self, + bind_group_layouts: &BindGroupLayouts, + ) -> BindGroupMask { + self.manager.invalid_mask(bind_group_layouts) } /// Scan active buffer bindings corresponding to layouts without `min_binding_size` specified. diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index 0a0b4e85e6b..1d79201df1c 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -1,6 +1,7 @@ use crate::{ binding_model::{ - BindError, BindGroup, LateMinBufferBindingSizeMismatch, PushConstantUploadError, + BindError, BindGroup, BindGroupLayouts, LateMinBufferBindingSizeMismatch, + PushConstantUploadError, }, command::{ bind::Binder, @@ -257,8 +258,8 @@ struct State { } impl State { - fn is_ready(&self) -> Result<(), DispatchError> { - let bind_mask = self.binder.invalid_mask(); + fn is_ready(&self, bind_group_layouts: &BindGroupLayouts) -> Result<(), DispatchError> { + let bind_mask = self.binder.invalid_mask(bind_group_layouts); if bind_mask != 0 { //let (expected, provided) = self.binder.entries[index as usize].info(); return Err(DispatchError::IncompatibleBindGroup { @@ -371,6 +372,7 @@ impl Global { let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); let (pipeline_guard, mut token) = hub.compute_pipelines.read(&mut token); let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (bind_group_layout_guard, mut token) = hub.bind_group_layouts.read(&mut token); let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, _) = hub.textures.read(&mut token); @@ -591,7 +593,9 @@ impl Global { pipeline: state.pipeline, }; - state.is_ready().map_pass_err(scope)?; + state + .is_ready(&*bind_group_layout_guard) + .map_pass_err(scope)?; state .flush_states( raw, @@ -628,7 +632,9 @@ impl Global { pipeline: state.pipeline, }; - state.is_ready().map_pass_err(scope)?; + state + .is_ready(&*bind_group_layout_guard) + .map_pass_err(scope)?; device .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION) diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index e8080abba32..cf835317418 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -1,5 +1,5 @@ use crate::{ - binding_model::BindError, + binding_model::{BindError, BindGroupLayouts}, command::{ self, bind::Binder, @@ -386,7 +386,11 @@ struct State { } impl State { - fn is_ready(&self, indexed: bool) -> Result<(), DrawError> { + fn is_ready( + &self, + indexed: bool, + bind_group_layouts: &BindGroupLayouts, + ) -> Result<(), DrawError> { // Determine how many vertex buffers have already been bound let vertex_buffer_count = self.vertex.inputs.iter().take_while(|v| v.bound).count() as u32; // Compare with the needed quantity @@ -396,7 +400,7 @@ impl State { }); } - let bind_mask = self.binder.invalid_mask(); + let bind_mask = self.binder.invalid_mask(bind_group_layouts); if bind_mask != 0 { //let (expected, provided) = self.binder.entries[index as usize].info(); return Err(DrawError::IncompatibleBindGroup { @@ -1252,6 +1256,7 @@ impl Global { let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); let (render_pipeline_guard, mut token) = hub.render_pipelines.read(&mut token); let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (bind_group_layout_guard, mut token) = hub.bind_group_layouts.read(&mut token); let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, mut token) = hub.textures.read(&mut token); let (view_guard, _) = hub.texture_views.read(&mut token); @@ -1713,7 +1718,9 @@ impl Global { indirect: false, pipeline: state.pipeline, }; - state.is_ready(indexed).map_pass_err(scope)?; + state + .is_ready::(indexed, &bind_group_layout_guard) + .map_pass_err(scope)?; let last_vertex = first_vertex + vertex_count; let vertex_limit = state.vertex.vertex_limit; @@ -1753,7 +1760,9 @@ impl Global { indirect: false, pipeline: state.pipeline, }; - state.is_ready(indexed).map_pass_err(scope)?; + state + .is_ready::(indexed, &*bind_group_layout_guard) + .map_pass_err(scope)?; //TODO: validate that base_vertex + max_index() is // within the provided range @@ -1798,7 +1807,9 @@ impl Global { indirect: true, pipeline: state.pipeline, }; - state.is_ready(indexed).map_pass_err(scope)?; + state + .is_ready::(indexed, &*bind_group_layout_guard) + .map_pass_err(scope)?; let stride = match indexed { false => mem::size_of::(), @@ -1870,7 +1881,9 @@ impl Global { indirect: true, pipeline: state.pipeline, }; - state.is_ready(indexed).map_pass_err(scope)?; + state + .is_ready::(indexed, &*bind_group_layout_guard) + .map_pass_err(scope)?; let stride = match indexed { false => mem::size_of::(), diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 77a4e4275c7..fb9ab2956de 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1,7 +1,8 @@ #[cfg(feature = "trace")] use crate::device::trace; use crate::{ - binding_model, command, conv, + binding_model, + command, conv, device::{life::WaitIdleError, map_buffer, queue, Device, DeviceError, HostMap}, global::Global, hal_api::HalApi, @@ -1069,19 +1070,30 @@ impl Global { } } - // If there is an equivalent BGL, just bump the refcount and return it. - // This is only applicable for identity filters that are generating new IDs, - // so their inputs are `PhantomData` of size 0. - if mem::size_of::>() == 0 { + let mut compatible_layout = None; + { let (bgl_guard, _) = hub.bind_group_layouts.read(&mut token); if let Some(id) = Device::deduplicate_bind_group_layout(device_id, &entry_map, &*bgl_guard) { - return (id, None); + // If there is an equivalent BGL, just bump the refcount and return it. + // This is only applicable to identity filters that generate their IDs, + // which use PhantomData as their identity. + // In practice this means: + // - wgpu users take this branch and return the existing + // id without using the indirection layer in BindGrouoLayout. + // - Other users like gecko or the replay tool use don't take + // the branch and instead rely on the indirection to use the + // proper bind group layout id. + if std::mem::size_of::>() == 0 { + return (id, None); + } + + compatible_layout = Some(id::Valid(id)); } } - let layout = match device.create_bind_group_layout( + let mut layout = match device.create_bind_group_layout( device_id, desc.label.borrow_option(), entry_map, @@ -1090,7 +1102,10 @@ impl Global { Err(e) => break e, }; + layout.compatible_layout = compatible_layout; + let id = fid.assign(layout, &mut token); + return (id.0, None); }; @@ -1235,16 +1250,28 @@ impl Global { .add(trace::Action::CreateBindGroup(fid.id(), desc.clone())); } - let bind_group_layout = match bind_group_layout_guard.get(desc.layout) { + let mut bind_group_layout = match bind_group_layout_guard.get(desc.layout) { Ok(layout) => layout, - Err(_) => break binding_model::CreateBindGroupError::InvalidLayout, + Err(..) => break binding_model::CreateBindGroupError::InvalidLayout, + }; + + let mut layout_id = id::Valid(desc.layout); + if let Some(id) = bind_group_layout.compatible_layout { + layout_id = id; + bind_group_layout = &bind_group_layout_guard[id]; + } + + let bind_group = match device.create_bind_group( + device_id, + bind_group_layout, + layout_id, + desc, + hub, + &mut token, + ) { + Ok(bind_group) => bind_group, + Err(e) => break e, }; - let bind_group = - match device.create_bind_group(device_id, bind_group_layout, desc, hub, &mut token) - { - Ok(bind_group) => bind_group, - Err(e) => break e, - }; let ref_count = bind_group.life_guard.add_ref(); let id = fid.assign(bind_group, &mut token); diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 75491d10cb9..43966247eda 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -762,14 +762,24 @@ impl LifetimeTracker { //Note: nothing else can bump the refcount since the guard is locked exclusively //Note: same BGL can appear multiple times in the list, but only the last // encounter could drop the refcount to 0. - if guard[id].multi_ref_count.dec_and_check_empty() { - log::debug!("Bind group layout {:?} will be destroyed", id); - #[cfg(feature = "trace")] - if let Some(t) = trace { - t.lock().add(trace::Action::DestroyBindGroupLayout(id.0)); - } - if let Some(lay) = hub.bind_group_layouts.unregister_locked(id.0, &mut *guard) { - self.free_resources.bind_group_layouts.push(lay.raw); + let mut bgl_to_check = Some(id); + while let Some(id) = bgl_to_check.take() { + let bgl = &guard[id]; + if bgl.multi_ref_count.dec_and_check_empty() { + // If This layout points to a compatible one, go over the latter + // to decrement the ref count and potentially destroy it. + bgl_to_check = bgl.compatible_layout; + + log::debug!("Bind group layout {:?} will be destroyed", id); + #[cfg(feature = "trace")] + if let Some(t) = trace { + t.lock().add(trace::Action::DestroyBindGroupLayout(id.0)); + } + if let Some(lay) = + hub.bind_group_layouts.unregister_locked(id.0, &mut *guard) + { + self.free_resources.bind_group_layouts.push(lay.raw); + } } } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 20e057a934a..3fdaa39d56d 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -1,7 +1,8 @@ #[cfg(feature = "trace")] use crate::device::trace; use crate::{ - binding_model, command, conv, + binding_model::{self, get_bind_group_layout, try_get_bind_group_layout}, + command, conv, device::life::WaitIdleError, device::{ AttachmentData, CommandAllocator, MissingDownlevelFlags, MissingFeatures, @@ -1369,6 +1370,11 @@ impl Device { .iter(self_id.backend()) .find(|&(_, bgl)| bgl.device_id.value.0 == self_id && bgl.entries == *entry_map) .map(|(id, value)| { + if let Some(comaptible) = value.compatible_layout { + guard[comaptible].multi_ref_count.inc(); + return comaptible.0; + } + value.multi_ref_count.inc(); id }) @@ -1626,6 +1632,7 @@ impl Device { entries: entry_map, #[cfg(debug_assertions)] label: label.unwrap_or("").to_string(), + compatible_layout: None, }) } @@ -1799,6 +1806,7 @@ impl Device { &self, self_id: id::DeviceId, layout: &binding_model::BindGroupLayout, + layout_id: id::Valid, desc: &binding_model::BindGroupDescriptor, hub: &Hub, token: &mut Token>, @@ -2038,7 +2046,7 @@ impl Device { value: id::Valid(self_id), ref_count: self.life_guard.add_ref(), }, - layout_id: id::Valid(desc.layout), + layout_id, life_guard: LifeGuard::new(desc.label.borrow_or_default()), used, used_buffer_ranges, @@ -2287,7 +2295,7 @@ impl Device { let bgl_vec = desc .bind_group_layouts .iter() - .map(|&id| &bgl_guard.get(id).unwrap().raw) + .map(|&id| &try_get_bind_group_layout(bgl_guard, id).unwrap().raw) .collect::>(); let hal_desc = hal::PipelineLayoutDescriptor { label: desc.label.borrow_option(), @@ -2314,8 +2322,9 @@ impl Device { .iter() .map(|&id| { // manually add a dependency to BGL - bgl_guard.get(id).unwrap().multi_ref_count.inc(); - id::Valid(id) + let (id, layout) = get_bind_group_layout(bgl_guard, id::Valid(id)); + layout.multi_ref_count.inc(); + id }) .collect(), push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(), diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index d3da0be6c21..c0670e085ce 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -243,6 +243,7 @@ impl Access> for RenderBundle {} impl Access> for Root {} impl Access> for Device {} impl Access> for PipelineLayout {} +impl Access> for QuerySet {} impl Access> for Root {} impl Access> for Device {} impl Access> for BindGroupLayout {}