From bfd89f0c1b13d69affa9020bfacdb69d4111d0c8 Mon Sep 17 00:00:00 2001 From: James Liu Date: Thu, 23 Jun 2022 10:52:49 +0000 Subject: [PATCH] Allow unbatched render phases to use unstable sorts (#5049) # Objective Partially addresses #4291. Speed up the sort phase for unbatched render phases. ## Solution Split out one of the optimizations in #4899 and allow implementors of `PhaseItem` to change what kind of sort is used when sorting the items in the phase. This currently includes Stable, Unstable, and Unsorted. Each of these corresponds to `Vec::sort_by_key`, `Vec::sort_unstable_by_key`, and no sorting at all. The default is `Unstable`. The last one can be used as a default if users introduce a preliminary depth prepass. ## Performance This will not impact the performance of any batched phases, as it is still using a stable sort. 2D's only phase is unchanged. All 3D phases are unbatched currently, and will benefit from this change. On `many_cubes`, where the primary phase is opaque, this change sees a speed up from 907.02us -> 477.62us, a 47.35% reduction. ![image](https://user-images.githubusercontent.com/3137680/174471253-22424874-30d5-4db5-b5b4-65fb2c612a9c.png) ## Future Work There were prior discussions to add support for faster radix sorts in #4291, which in theory should be a `O(n)` instead of a `O(nlog(n))` time. [`voracious`](https://crates.io/crates/voracious_radix_sort) has been proposed, but it seems to be optimize for use cases with more than 30,000 items, which may be atypical for most systems. Another optimization included in #4899 is to reduce the size of a few of the IDs commonly used in `PhaseItem` implementations to shrink the types to make swapping/sorting faster. Both `CachedPipelineId` and `DrawFunctionId` could be reduced to `u32` instead of `usize`. Ideally, this should automatically change to use stable sorts when `BatchedPhaseItem` is implemented on the same phase item type, but this requires specialization, which may not land in stable Rust for a short while. --- ## Changelog Added: `PhaseItem::sort` ## Migration Guide RenderPhases now default to a unstable sort (via `slice::sort_unstable_by_key`). This can typically improve sort phase performance, but may produce incorrect batching results when implementing `BatchedPhaseItem`. To revert to the older stable sort, manually implement `PhaseItem::sort` to implement a stable sort (i.e. via `slice::sort_by_key`). Co-authored-by: Federico Rinaldi Co-authored-by: Robert Swain Co-authored-by: colepoirier --- crates/bevy_core_pipeline/Cargo.toml | 2 +- crates/bevy_core_pipeline/src/core_2d/mod.rs | 5 +++++ crates/bevy_core_pipeline/src/core_3d/mod.rs | 15 +++++++++++++ crates/bevy_pbr/Cargo.toml | 1 + crates/bevy_pbr/src/render/light.rs | 5 +++++ crates/bevy_render/src/render_phase/draw.rs | 22 +++++++++++++++++++- crates/bevy_render/src/render_phase/mod.rs | 2 +- 7 files changed, 49 insertions(+), 3 deletions(-) diff --git a/crates/bevy_core_pipeline/Cargo.toml b/crates/bevy_core_pipeline/Cargo.toml index 89602f5048756..26c8ffee15116 100644 --- a/crates/bevy_core_pipeline/Cargo.toml +++ b/crates/bevy_core_pipeline/Cargo.toml @@ -28,4 +28,4 @@ bevy_transform = { path = "../bevy_transform", version = "0.8.0-dev" } bevy_utils = { path = "../bevy_utils", version = "0.8.0-dev" } serde = { version = "1", features = ["derive"] } - +radsort = "0.1" diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs index 8acdb42da54ef..4e1e8098e93c9 100644 --- a/crates/bevy_core_pipeline/src/core_2d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs @@ -90,6 +90,11 @@ impl PhaseItem for Transparent2d { fn draw_function(&self) -> DrawFunctionId { self.draw_function } + + #[inline] + fn sort(items: &mut [Self]) { + items.sort_by_key(|item| item.sort_key()); + } } impl EntityPhaseItem for Transparent2d { diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs index 6342d8471291b..e3b230a1e7e20 100644 --- a/crates/bevy_core_pipeline/src/core_3d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs @@ -98,6 +98,11 @@ impl PhaseItem for Opaque3d { fn draw_function(&self) -> DrawFunctionId { self.draw_function } + + #[inline] + fn sort(items: &mut [Self]) { + radsort::sort_by_key(items, |item| item.distance); + } } impl EntityPhaseItem for Opaque3d { @@ -133,6 +138,11 @@ impl PhaseItem for AlphaMask3d { fn draw_function(&self) -> DrawFunctionId { self.draw_function } + + #[inline] + fn sort(items: &mut [Self]) { + radsort::sort_by_key(items, |item| item.distance); + } } impl EntityPhaseItem for AlphaMask3d { @@ -168,6 +178,11 @@ impl PhaseItem for Transparent3d { fn draw_function(&self) -> DrawFunctionId { self.draw_function } + + #[inline] + fn sort(items: &mut [Self]) { + radsort::sort_by_key(items, |item| item.distance); + } } impl EntityPhaseItem for Transparent3d { diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index 37107c44d433a..46bf0f4e8c4f0 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -28,3 +28,4 @@ bevy_window = { path = "../bevy_window", version = "0.8.0-dev" } bitflags = "1.2" # direct dependency required for derive macro bytemuck = { version = "1", features = ["derive"] } +radsort = "0.1" diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index 63cf180e0e810..bf50d1a8e5fc3 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -1421,6 +1421,11 @@ impl PhaseItem for Shadow { fn draw_function(&self) -> DrawFunctionId { self.draw_function } + + #[inline] + fn sort(items: &mut [Self]) { + radsort::sort_by_key(items, |item| item.distance); + } } impl EntityPhaseItem for Shadow { diff --git a/crates/bevy_render/src/render_phase/draw.rs b/crates/bevy_render/src/render_phase/draw.rs index 5ffca0ea25886..bb44126eb60d6 100644 --- a/crates/bevy_render/src/render_phase/draw.rs +++ b/crates/bevy_render/src/render_phase/draw.rs @@ -35,13 +35,30 @@ pub trait Draw: Send + Sync + 'static { /// Afterwards it will be sorted and rendered automatically in the /// [`RenderStage::PhaseSort`](crate::RenderStage::PhaseSort) stage and /// [`RenderStage::Render`](crate::RenderStage::Render) stage, respectively. -pub trait PhaseItem: Send + Sync + 'static { +pub trait PhaseItem: Sized + Send + Sync + 'static { /// The type used for ordering the items. The smallest values are drawn first. type SortKey: Ord; /// Determines the order in which the items are drawn during the corresponding [`RenderPhase`](super::RenderPhase). fn sort_key(&self) -> Self::SortKey; /// Specifies the [`Draw`] function used to render the item. fn draw_function(&self) -> DrawFunctionId; + + /// Sorts a slice of phase items into render order. Generally if the same type + /// implements [`BatchedPhaseItem`], this should use a stable sort like [`slice::sort_by_key`]. + /// In almost all other cases, this should not be altered from the default, + /// which uses a unstable sort, as this provides the best balance of CPU and GPU + /// performance. + /// + /// Implementers can optionally not sort the list at all. This is generally advisable if and + /// only if the renderer supports a depth prepass, which is by default not supported by + /// the rest of Bevy's first party rendering crates. Even then, this may have a negative + /// impact on GPU-side performance due to overdraw. + /// + /// It's advised to always profile for performance changes when changing this implementation. + #[inline] + fn sort(items: &mut [Self]) { + items.sort_unstable_by_key(|item| item.sort_key()); + } } // TODO: make this generic? @@ -170,6 +187,9 @@ pub trait CachedRenderPipelinePhaseItem: PhaseItem { /// /// Batching is an optimization that regroups multiple items in the same vertex buffer /// to render them in a single draw call. +/// +/// If this is implemented on a type, the implementation of [`PhaseItem::sort`] should +/// be changed to implement a stable sort, or incorrect/suboptimal batching may result. pub trait BatchedPhaseItem: EntityPhaseItem { /// Range in the vertex buffer of this item fn batch_range(&self) -> &Option>; diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs index 76de6bf66501f..464cbdcbe92a6 100644 --- a/crates/bevy_render/src/render_phase/mod.rs +++ b/crates/bevy_render/src/render_phase/mod.rs @@ -29,7 +29,7 @@ impl RenderPhase { /// Sorts all of its [`PhaseItems`](PhaseItem). pub fn sort(&mut self) { - self.items.sort_by_key(|d| d.sort_key()); + I::sort(&mut self.items); } }