From 701378ffbc69a72236763dcfd168e9237deb104c Mon Sep 17 00:00:00 2001 From: Lain-dono Date: Tue, 30 Aug 2022 03:00:39 +0000 Subject: [PATCH] Use 3 bits of PipelineKey to store MSAA sample count (#5826) Sample count always power of two. Thus, it is enough to store `log2(sample_count)`. This can be implemented using [u32::trailing_zeros](https://doc.rust-lang.org/stable/std/primitive.u32.html#method.trailing_zeros). Then we can restore sample count with the `1 << stored`. You get 3 bits instead of 6 and up to 128x MSAA. This is more than is supported by any common hardware. Full table of possible variations: ``` original MSAA sample count stored loaded * 00000000000000000000000000000000 -> 000 -> 00000001 1 00000000000000000000000000000001 -> 000 -> 00000001 1 00000000000000000000000000000010 -> 001 -> 00000010 2 00000000000000000000000000000100 -> 010 -> 00000100 4 00000000000000000000000000001000 -> 011 -> 00001000 8 00000000000000000000000000010000 -> 100 -> 00010000 16 00000000000000000000000000100000 -> 101 -> 00100000 32 00000000000000000000000001000000 -> 110 -> 01000000 64 00000000000000000000000010000000 -> 111 -> 10000000 128 * 00000000000000000000000100000000 -> 000 -> 00000001 256 * 00000000000000000000001000000000 -> 001 -> 00000010 512 * 00000000000000000000010000000000 -> 010 -> 00000100 1024 * 00000000000000000000100000000000 -> 011 -> 00001000 2048 * 00000000000000000001000000000000 -> 100 -> 00010000 4096 * 00000000000000000010000000000000 -> 101 -> 00100000 8192 * 00000000000000000100000000000000 -> 110 -> 01000000 16384 * 00000000000000001000000000000000 -> 111 -> 10000000 32768 * 00000000000000010000000000000000 -> 000 -> 00000001 65536 * 00000000000000100000000000000000 -> 001 -> 00000010 131072 * 00000000000001000000000000000000 -> 010 -> 00000100 262144 * 00000000000010000000000000000000 -> 011 -> 00001000 524288 * 00000000000100000000000000000000 -> 100 -> 00010000 1048576 * 00000000001000000000000000000000 -> 101 -> 00100000 2097152 * 00000000010000000000000000000000 -> 110 -> 01000000 4194304 * 00000000100000000000000000000000 -> 111 -> 10000000 8388608 * 00000001000000000000000000000000 -> 000 -> 00000001 16777216 * 00000010000000000000000000000000 -> 001 -> 00000010 33554432 * 00000100000000000000000000000000 -> 010 -> 00000100 67108864 * 00001000000000000000000000000000 -> 011 -> 00001000 134217728 * 00010000000000000000000000000000 -> 100 -> 00010000 268435456 * 00100000000000000000000000000000 -> 101 -> 00100000 536870912 * 01000000000000000000000000000000 -> 110 -> 01000000 1073741824 * 10000000000000000000000000000000 -> 111 -> 10000000 2147483648 ``` --- crates/bevy_pbr/src/render/mesh.rs | 21 +++++++++++---------- crates/bevy_sprite/src/mesh2d/mesh.rs | 19 ++++++++++--------- crates/bevy_sprite/src/render/mod.rs | 15 ++++++++------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 3d70cfd110eb52..f5e9a9ae8519ba 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -500,35 +500,36 @@ impl MeshPipeline { bitflags::bitflags! { #[repr(transparent)] // NOTE: Apparently quadro drivers support up to 64x MSAA. - /// MSAA uses the highest 6 bits for the MSAA sample count - 1 to support up to 64x MSAA. + /// MSAA uses the highest 3 bits for the MSAA log2(sample count) to support up to 128x MSAA. pub struct MeshPipelineKey: u32 { const NONE = 0; const TRANSPARENT_MAIN_PASS = (1 << 0); - const MSAA_RESERVED_BITS = MeshPipelineKey::MSAA_MASK_BITS << MeshPipelineKey::MSAA_SHIFT_BITS; - const PRIMITIVE_TOPOLOGY_RESERVED_BITS = MeshPipelineKey::PRIMITIVE_TOPOLOGY_MASK_BITS << MeshPipelineKey::PRIMITIVE_TOPOLOGY_SHIFT_BITS; + const MSAA_RESERVED_BITS = Self::MSAA_MASK_BITS << Self::MSAA_SHIFT_BITS; + const PRIMITIVE_TOPOLOGY_RESERVED_BITS = Self::PRIMITIVE_TOPOLOGY_MASK_BITS << Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS; } } impl MeshPipelineKey { - const MSAA_MASK_BITS: u32 = 0b111111; - const MSAA_SHIFT_BITS: u32 = 32 - 6; + const MSAA_MASK_BITS: u32 = 0b111; + const MSAA_SHIFT_BITS: u32 = 32 - Self::MSAA_MASK_BITS.count_ones(); const PRIMITIVE_TOPOLOGY_MASK_BITS: u32 = 0b111; const PRIMITIVE_TOPOLOGY_SHIFT_BITS: u32 = Self::MSAA_SHIFT_BITS - 3; pub fn from_msaa_samples(msaa_samples: u32) -> Self { - let msaa_bits = ((msaa_samples - 1) & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS; - MeshPipelineKey::from_bits(msaa_bits).unwrap() + let msaa_bits = + (msaa_samples.trailing_zeros() & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS; + Self::from_bits(msaa_bits).unwrap() } pub fn msaa_samples(&self) -> u32 { - ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) + 1 + 1 << ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) } pub fn from_primitive_topology(primitive_topology: PrimitiveTopology) -> Self { let primitive_topology_bits = ((primitive_topology as u32) & Self::PRIMITIVE_TOPOLOGY_MASK_BITS) << Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS; - MeshPipelineKey::from_bits(primitive_topology_bits).unwrap() + Self::from_bits(primitive_topology_bits).unwrap() } pub fn primitive_topology(&self) -> PrimitiveTopology { @@ -923,7 +924,7 @@ mod tests { use super::MeshPipelineKey; #[test] fn mesh_key_msaa_samples() { - for i in 1..=64 { + for i in [1, 2, 4, 8, 16, 32, 64, 128] { assert_eq!(MeshPipelineKey::from_msaa_samples(i).msaa_samples(), i); } } diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs index 18513f0d5362d1..54927a00770314 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh.rs +++ b/crates/bevy_sprite/src/mesh2d/mesh.rs @@ -269,35 +269,36 @@ impl Mesh2dPipeline { bitflags::bitflags! { #[repr(transparent)] // NOTE: Apparently quadro drivers support up to 64x MSAA. - // MSAA uses the highest 6 bits for the MSAA sample count - 1 to support up to 64x MSAA. + // MSAA uses the highest 3 bits for the MSAA log2(sample count) to support up to 128x MSAA. // FIXME: make normals optional? pub struct Mesh2dPipelineKey: u32 { const NONE = 0; - const MSAA_RESERVED_BITS = Mesh2dPipelineKey::MSAA_MASK_BITS << Mesh2dPipelineKey::MSAA_SHIFT_BITS; - const PRIMITIVE_TOPOLOGY_RESERVED_BITS = Mesh2dPipelineKey::PRIMITIVE_TOPOLOGY_MASK_BITS << Mesh2dPipelineKey::PRIMITIVE_TOPOLOGY_SHIFT_BITS; + const MSAA_RESERVED_BITS = Self::MSAA_MASK_BITS << Self::MSAA_SHIFT_BITS; + const PRIMITIVE_TOPOLOGY_RESERVED_BITS = Self::PRIMITIVE_TOPOLOGY_MASK_BITS << Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS; } } impl Mesh2dPipelineKey { - const MSAA_MASK_BITS: u32 = 0b111111; - const MSAA_SHIFT_BITS: u32 = 32 - 6; + const MSAA_MASK_BITS: u32 = 0b111; + const MSAA_SHIFT_BITS: u32 = 32 - Self::MSAA_MASK_BITS.count_ones(); const PRIMITIVE_TOPOLOGY_MASK_BITS: u32 = 0b111; const PRIMITIVE_TOPOLOGY_SHIFT_BITS: u32 = Self::MSAA_SHIFT_BITS - 3; pub fn from_msaa_samples(msaa_samples: u32) -> Self { - let msaa_bits = ((msaa_samples - 1) & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS; - Mesh2dPipelineKey::from_bits(msaa_bits).unwrap() + let msaa_bits = + (msaa_samples.trailing_zeros() & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS; + Self::from_bits(msaa_bits).unwrap() } pub fn msaa_samples(&self) -> u32 { - ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) + 1 + 1 << ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) } pub fn from_primitive_topology(primitive_topology: PrimitiveTopology) -> Self { let primitive_topology_bits = ((primitive_topology as u32) & Self::PRIMITIVE_TOPOLOGY_MASK_BITS) << Self::PRIMITIVE_TOPOLOGY_SHIFT_BITS; - Mesh2dPipelineKey::from_bits(primitive_topology_bits).unwrap() + Self::from_bits(primitive_topology_bits).unwrap() } pub fn primitive_topology(&self) -> PrimitiveTopology { diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs index c2f98b2aab2868..db1dc1a86d7539 100644 --- a/crates/bevy_sprite/src/render/mod.rs +++ b/crates/bevy_sprite/src/render/mod.rs @@ -90,25 +90,26 @@ impl FromWorld for SpritePipeline { bitflags::bitflags! { #[repr(transparent)] // NOTE: Apparently quadro drivers support up to 64x MSAA. - // MSAA uses the highest 6 bits for the MSAA sample count - 1 to support up to 64x MSAA. + // MSAA uses the highest 3 bits for the MSAA log2(sample count) to support up to 128x MSAA. pub struct SpritePipelineKey: u32 { const NONE = 0; const COLORED = (1 << 0); - const MSAA_RESERVED_BITS = SpritePipelineKey::MSAA_MASK_BITS << SpritePipelineKey::MSAA_SHIFT_BITS; + const MSAA_RESERVED_BITS = Self::MSAA_MASK_BITS << Self::MSAA_SHIFT_BITS; } } impl SpritePipelineKey { - const MSAA_MASK_BITS: u32 = 0b111111; - const MSAA_SHIFT_BITS: u32 = 32 - 6; + const MSAA_MASK_BITS: u32 = 0b111; + const MSAA_SHIFT_BITS: u32 = 32 - Self::MSAA_MASK_BITS.count_ones(); pub fn from_msaa_samples(msaa_samples: u32) -> Self { - let msaa_bits = ((msaa_samples - 1) & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS; - SpritePipelineKey::from_bits(msaa_bits).unwrap() + let msaa_bits = + (msaa_samples.trailing_zeros() & Self::MSAA_MASK_BITS) << Self::MSAA_SHIFT_BITS; + Self::from_bits(msaa_bits).unwrap() } pub fn msaa_samples(&self) -> u32 { - ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) + 1 + 1 << ((self.bits >> Self::MSAA_SHIFT_BITS) & Self::MSAA_MASK_BITS) } }