diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index a06476a2121..98b09f0fae9 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -436,6 +436,8 @@ pub struct BindGroupLayoutDescriptor<'a> {
 
 pub(crate) type BindEntryMap = FastHashMap<u32, wgt::BindGroupLayoutEntry>;
 
+pub type BindGroupLayouts<A> = crate::storage::Storage<BindGroupLayout<A>, BindGroupLayoutId>;
+
 /// Bind group layout.
 ///
 /// The lifetime of BGLs is a bit special. They are only referenced on CPU
@@ -450,6 +452,12 @@ pub struct BindGroupLayout<A: hal::Api> {
     pub(crate) device_id: Stored<DeviceId>,
     pub(crate) multi_ref_count: MultiRefCount,
     pub(crate) entries: BindEntryMap,
+    // When a layout created and there already exists a compatible layout the new layout
+    // keeps a reference to the older compatible one. In some places we the substitute
+    // the bind group layout id with its compatible sibbling.
+    // Since this substitution can come at a cost, it is skipped when wgpu-core generates
+    // its own resource IDs.
+    pub(crate) compatible_layout: Option<Valid<BindGroupLayoutId>>,
     #[allow(unused)]
     pub(crate) dynamic_count: usize,
     pub(crate) count_validator: BindingTypeMaxCountValidator,
@@ -472,6 +480,30 @@ impl<A: hal::Api> Resource for BindGroupLayout<A> {
     }
 }
 
+// If a bindgroup needs to be substitued with its compatible equivalent, return the latter.
+pub(crate) fn try_get_bind_group_layout<A: HalApi>(
+    layouts: &BindGroupLayouts<A>,
+    id: BindGroupLayoutId,
+) -> Option<&BindGroupLayout<A>> {
+    let layout = layouts.get(id).ok()?;
+    if let Some(compat) = layout.compatible_layout {
+        return Some(&layouts[compat]);
+    }
+
+    Some(layout)
+}
+
+pub(crate) fn get_bind_group_layout<A: HalApi>(
+    layouts: &BindGroupLayouts<A>,
+    id: Valid<BindGroupLayoutId>,
+) -> (Valid<BindGroupLayoutId>, &BindGroupLayout<A>) {
+    let layout = &layouts[id];
+    layout
+        .compatible_layout
+        .map(|compat| (compat, &layouts[compat]))
+        .unwrap_or((id, layout))
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum CreatePipelineLayoutError {
diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs
index 5bba3e80ed4..05f90c6bc91 100644
--- a/wgpu-core/src/command/bind.rs
+++ b/wgpu-core/src/command/bind.rs
@@ -1,5 +1,7 @@
 use crate::{
-    binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout},
+    binding_model::{
+        BindGroup, BindGroupLayouts, LateMinBufferBindingSizeMismatch, PipelineLayout,
+    },
     device::SHADER_STAGE_COUNT,
     hal_api::HalApi,
     id::{BindGroupId, PipelineLayoutId, Valid},
@@ -13,7 +15,10 @@ use arrayvec::ArrayVec;
 type BindGroupMask = u8;
 
 mod compat {
-    use crate::id::{BindGroupLayoutId, Valid};
+    use crate::{
+        binding_model::BindGroupLayouts,
+        id::{BindGroupLayoutId, Valid},
+    };
     use std::ops::Range;
 
     #[derive(Debug, Default)]
@@ -27,8 +32,16 @@ mod compat {
             self.assigned.is_some() && self.expected.is_some()
         }
 
-        fn is_valid(&self) -> bool {
-            self.expected.is_none() || self.expected == self.assigned
+        fn is_valid<A: hal::Api>(&self, bind_group_layouts: &BindGroupLayouts<A>) -> bool {
+            if self.expected.is_none() || self.expected == self.assigned {
+                return true;
+            }
+
+            if let Some(id) = self.assigned {
+                return bind_group_layouts[id].compatible_layout == self.expected;
+            }
+
+            false
         }
     }
 
@@ -88,9 +101,12 @@ mod compat {
                 .filter_map(|(i, e)| if e.is_active() { Some(i) } else { None })
         }
 
-        pub fn invalid_mask(&self) -> super::BindGroupMask {
+        pub fn invalid_mask<A: hal::Api>(
+            &self,
+            bind_group_layouts: &BindGroupLayouts<A>,
+        ) -> super::BindGroupMask {
             self.entries.iter().enumerate().fold(0, |mask, (i, entry)| {
-                if entry.is_valid() {
+                if entry.is_valid(bind_group_layouts) {
                     mask
                 } else {
                     mask | 1u8 << i
@@ -276,8 +292,11 @@ impl Binder {
             .map(move |index| payloads[index].group_id.as_ref().unwrap().value)
     }
 
-    pub(super) fn invalid_mask(&self) -> BindGroupMask {
-        self.manager.invalid_mask()
+    pub(super) fn invalid_mask<A: hal::Api>(
+        &self,
+        bind_group_layouts: &BindGroupLayouts<A>,
+    ) -> BindGroupMask {
+        self.manager.invalid_mask(bind_group_layouts)
     }
 
     /// Scan active buffer bindings corresponding to layouts without `min_binding_size` specified.
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index 0a0b4e85e6b..1d79201df1c 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -1,6 +1,7 @@
 use crate::{
     binding_model::{
-        BindError, BindGroup, LateMinBufferBindingSizeMismatch, PushConstantUploadError,
+        BindError, BindGroup, BindGroupLayouts, LateMinBufferBindingSizeMismatch,
+        PushConstantUploadError,
     },
     command::{
         bind::Binder,
@@ -257,8 +258,8 @@ struct State<A: HalApi> {
 }
 
 impl<A: HalApi> State<A> {
-    fn is_ready(&self) -> Result<(), DispatchError> {
-        let bind_mask = self.binder.invalid_mask();
+    fn is_ready(&self, bind_group_layouts: &BindGroupLayouts<A>) -> Result<(), DispatchError> {
+        let bind_mask = self.binder.invalid_mask(bind_group_layouts);
         if bind_mask != 0 {
             //let (expected, provided) = self.binder.entries[index as usize].info();
             return Err(DispatchError::IncompatibleBindGroup {
@@ -371,6 +372,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
         let (pipeline_guard, mut token) = hub.compute_pipelines.read(&mut token);
         let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
+        let (bind_group_layout_guard, mut token) = hub.bind_group_layouts.read(&mut token);
         let (buffer_guard, mut token) = hub.buffers.read(&mut token);
         let (texture_guard, _) = hub.textures.read(&mut token);
 
@@ -591,7 +593,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                         pipeline: state.pipeline,
                     };
 
-                    state.is_ready().map_pass_err(scope)?;
+                    state
+                        .is_ready(&*bind_group_layout_guard)
+                        .map_pass_err(scope)?;
                     state
                         .flush_states(
                             raw,
@@ -628,7 +632,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                         pipeline: state.pipeline,
                     };
 
-                    state.is_ready().map_pass_err(scope)?;
+                    state
+                        .is_ready(&*bind_group_layout_guard)
+                        .map_pass_err(scope)?;
 
                     device
                         .require_downlevel_flags(wgt::DownlevelFlags::INDIRECT_EXECUTION)
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index e8080abba32..cf835317418 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -1,5 +1,5 @@
 use crate::{
-    binding_model::BindError,
+    binding_model::{BindError, BindGroupLayouts},
     command::{
         self,
         bind::Binder,
@@ -386,7 +386,11 @@ struct State {
 }
 
 impl State {
-    fn is_ready(&self, indexed: bool) -> Result<(), DrawError> {
+    fn is_ready<A: hal::Api>(
+        &self,
+        indexed: bool,
+        bind_group_layouts: &BindGroupLayouts<A>,
+    ) -> Result<(), DrawError> {
         // Determine how many vertex buffers have already been bound
         let vertex_buffer_count = self.vertex.inputs.iter().take_while(|v| v.bound).count() as u32;
         // Compare with the needed quantity
@@ -396,7 +400,7 @@ impl State {
             });
         }
 
-        let bind_mask = self.binder.invalid_mask();
+        let bind_mask = self.binder.invalid_mask(bind_group_layouts);
         if bind_mask != 0 {
             //let (expected, provided) = self.binder.entries[index as usize].info();
             return Err(DrawError::IncompatibleBindGroup {
@@ -1252,6 +1256,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
             let (render_pipeline_guard, mut token) = hub.render_pipelines.read(&mut token);
             let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
+            let (bind_group_layout_guard, mut token) = hub.bind_group_layouts.read(&mut token);
             let (buffer_guard, mut token) = hub.buffers.read(&mut token);
             let (texture_guard, mut token) = hub.textures.read(&mut token);
             let (view_guard, _) = hub.texture_views.read(&mut token);
@@ -1713,7 +1718,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: false,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         let last_vertex = first_vertex + vertex_count;
                         let vertex_limit = state.vertex.vertex_limit;
@@ -1753,7 +1760,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: false,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &*bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         //TODO: validate that base_vertex + max_index() is
                         // within the provided range
@@ -1798,7 +1807,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: true,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &*bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         let stride = match indexed {
                             false => mem::size_of::<wgt::DrawIndirectArgs>(),
@@ -1870,7 +1881,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             indirect: true,
                             pipeline: state.pipeline,
                         };
-                        state.is_ready(indexed).map_pass_err(scope)?;
+                        state
+                            .is_ready::<A>(indexed, &*bind_group_layout_guard)
+                            .map_pass_err(scope)?;
 
                         let stride = match indexed {
                             false => mem::size_of::<wgt::DrawIndirectArgs>(),
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 77a4e4275c7..fb9ab2956de 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1,7 +1,8 @@
 #[cfg(feature = "trace")]
 use crate::device::trace;
 use crate::{
-    binding_model, command, conv,
+    binding_model,
+    command, conv,
     device::{life::WaitIdleError, map_buffer, queue, Device, DeviceError, HostMap},
     global::Global,
     hal_api::HalApi,
@@ -1069,19 +1070,30 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 }
             }
 
-            // If there is an equivalent BGL, just bump the refcount and return it.
-            // This is only applicable for identity filters that are generating new IDs,
-            // so their inputs are `PhantomData` of size 0.
-            if mem::size_of::<Input<G, id::BindGroupLayoutId>>() == 0 {
+            let mut compatible_layout = None;
+            {
                 let (bgl_guard, _) = hub.bind_group_layouts.read(&mut token);
                 if let Some(id) =
                     Device::deduplicate_bind_group_layout(device_id, &entry_map, &*bgl_guard)
                 {
-                    return (id, None);
+                    // If there is an equivalent BGL, just bump the refcount and return it.
+                    // This is only applicable to identity filters that generate their IDs,
+                    // which use PhantomData as their identity.
+                    // In practice this means:
+                    //  - wgpu users take this branch and return the existing
+                    //    id without using the indirection layer in BindGrouoLayout.
+                    //  - Other users like gecko or the replay tool use don't take
+                    //    the branch and instead rely on the indirection to use the
+                    //    proper bind group layout id.
+                    if std::mem::size_of::<Input<G, id::BindGroupLayoutId>>() == 0 {
+                        return (id, None);
+                    }
+
+                    compatible_layout = Some(id::Valid(id));
                 }
             }
 
-            let layout = match device.create_bind_group_layout(
+            let mut layout = match device.create_bind_group_layout(
                 device_id,
                 desc.label.borrow_option(),
                 entry_map,
@@ -1090,7 +1102,10 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                 Err(e) => break e,
             };
 
+            layout.compatible_layout = compatible_layout;
+
             let id = fid.assign(layout, &mut token);
+
             return (id.0, None);
         };
 
@@ -1235,16 +1250,28 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                     .add(trace::Action::CreateBindGroup(fid.id(), desc.clone()));
             }
 
-            let bind_group_layout = match bind_group_layout_guard.get(desc.layout) {
+            let mut bind_group_layout = match bind_group_layout_guard.get(desc.layout) {
                 Ok(layout) => layout,
-                Err(_) => break binding_model::CreateBindGroupError::InvalidLayout,
+                Err(..) => break binding_model::CreateBindGroupError::InvalidLayout,
+            };
+
+            let mut layout_id = id::Valid(desc.layout);
+            if let Some(id) = bind_group_layout.compatible_layout {
+                layout_id = id;
+                bind_group_layout = &bind_group_layout_guard[id];
+            }
+
+            let bind_group = match device.create_bind_group(
+                device_id,
+                bind_group_layout,
+                layout_id,
+                desc,
+                hub,
+                &mut token,
+            ) {
+                Ok(bind_group) => bind_group,
+                Err(e) => break e,
             };
-            let bind_group =
-                match device.create_bind_group(device_id, bind_group_layout, desc, hub, &mut token)
-                {
-                    Ok(bind_group) => bind_group,
-                    Err(e) => break e,
-                };
             let ref_count = bind_group.life_guard.add_ref();
 
             let id = fid.assign(bind_group, &mut token);
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 75491d10cb9..43966247eda 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -762,14 +762,24 @@ impl<A: HalApi> LifetimeTracker<A> {
                 //Note: nothing else can bump the refcount since the guard is locked exclusively
                 //Note: same BGL can appear multiple times in the list, but only the last
                 // encounter could drop the refcount to 0.
-                if guard[id].multi_ref_count.dec_and_check_empty() {
-                    log::debug!("Bind group layout {:?} will be destroyed", id);
-                    #[cfg(feature = "trace")]
-                    if let Some(t) = trace {
-                        t.lock().add(trace::Action::DestroyBindGroupLayout(id.0));
-                    }
-                    if let Some(lay) = hub.bind_group_layouts.unregister_locked(id.0, &mut *guard) {
-                        self.free_resources.bind_group_layouts.push(lay.raw);
+                let mut bgl_to_check = Some(id);
+                while let Some(id) = bgl_to_check.take() {
+                    let bgl = &guard[id];
+                    if bgl.multi_ref_count.dec_and_check_empty() {
+                        // If This layout points to a compatible one, go over the latter
+                        // to decrement the ref count and potentially destroy it.
+                        bgl_to_check = bgl.compatible_layout;
+
+                        log::debug!("Bind group layout {:?} will be destroyed", id);
+                        #[cfg(feature = "trace")]
+                        if let Some(t) = trace {
+                            t.lock().add(trace::Action::DestroyBindGroupLayout(id.0));
+                        }
+                        if let Some(lay) =
+                            hub.bind_group_layouts.unregister_locked(id.0, &mut *guard)
+                        {
+                            self.free_resources.bind_group_layouts.push(lay.raw);
+                        }
                     }
                 }
             }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 20e057a934a..3fdaa39d56d 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -1,7 +1,8 @@
 #[cfg(feature = "trace")]
 use crate::device::trace;
 use crate::{
-    binding_model, command, conv,
+    binding_model::{self, get_bind_group_layout, try_get_bind_group_layout},
+    command, conv,
     device::life::WaitIdleError,
     device::{
         AttachmentData, CommandAllocator, MissingDownlevelFlags, MissingFeatures,
@@ -1369,6 +1370,11 @@ impl<A: HalApi> Device<A> {
             .iter(self_id.backend())
             .find(|&(_, bgl)| bgl.device_id.value.0 == self_id && bgl.entries == *entry_map)
             .map(|(id, value)| {
+                if let Some(comaptible) = value.compatible_layout {
+                    guard[comaptible].multi_ref_count.inc();
+                    return comaptible.0;
+                }
+
                 value.multi_ref_count.inc();
                 id
             })
@@ -1626,6 +1632,7 @@ impl<A: HalApi> Device<A> {
             entries: entry_map,
             #[cfg(debug_assertions)]
             label: label.unwrap_or("").to_string(),
+            compatible_layout: None,
         })
     }
 
@@ -1799,6 +1806,7 @@ impl<A: HalApi> Device<A> {
         &self,
         self_id: id::DeviceId,
         layout: &binding_model::BindGroupLayout<A>,
+        layout_id: id::Valid<id::BindGroupLayoutId>,
         desc: &binding_model::BindGroupDescriptor,
         hub: &Hub<A, G>,
         token: &mut Token<binding_model::BindGroupLayout<A>>,
@@ -2038,7 +2046,7 @@ impl<A: HalApi> Device<A> {
                 value: id::Valid(self_id),
                 ref_count: self.life_guard.add_ref(),
             },
-            layout_id: id::Valid(desc.layout),
+            layout_id,
             life_guard: LifeGuard::new(desc.label.borrow_or_default()),
             used,
             used_buffer_ranges,
@@ -2287,7 +2295,7 @@ impl<A: HalApi> Device<A> {
         let bgl_vec = desc
             .bind_group_layouts
             .iter()
-            .map(|&id| &bgl_guard.get(id).unwrap().raw)
+            .map(|&id| &try_get_bind_group_layout(bgl_guard, id).unwrap().raw)
             .collect::<Vec<_>>();
         let hal_desc = hal::PipelineLayoutDescriptor {
             label: desc.label.borrow_option(),
@@ -2314,8 +2322,9 @@ impl<A: HalApi> Device<A> {
                 .iter()
                 .map(|&id| {
                     // manually add a dependency to BGL
-                    bgl_guard.get(id).unwrap().multi_ref_count.inc();
-                    id::Valid(id)
+                    let (id, layout) = get_bind_group_layout(bgl_guard, id::Valid(id));
+                    layout.multi_ref_count.inc();
+                    id
                 })
                 .collect(),
             push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(),
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index d3da0be6c21..c0670e085ce 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -243,6 +243,7 @@ impl<A: HalApi> Access<PipelineLayout<A>> for RenderBundle<A> {}
 impl<A: HalApi> Access<BindGroupLayout<A>> for Root {}
 impl<A: HalApi> Access<BindGroupLayout<A>> for Device<A> {}
 impl<A: HalApi> Access<BindGroupLayout<A>> for PipelineLayout<A> {}
+impl<A: HalApi> Access<BindGroupLayout<A>> for QuerySet<A> {}
 impl<A: HalApi> Access<BindGroup<A>> for Root {}
 impl<A: HalApi> Access<BindGroup<A>> for Device<A> {}
 impl<A: HalApi> Access<BindGroup<A>> for BindGroupLayout<A> {}