diff --git a/.cargo/config.toml b/.cargo/config.toml index b75130124b5..8434ec2cc6a 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -5,3 +5,6 @@ xtask = "run --manifest-path xtask/Cargo.toml" rustflags = [ "--cfg=web_sys_unstable_apis" ] +rustdocflags = [ +"--cfg=web_sys_unstable_apis" +] diff --git a/.deny.toml b/.deny.toml index 9c8f96c6b40..5f1dd13487a 100644 --- a/.deny.toml +++ b/.deny.toml @@ -6,8 +6,10 @@ skip-tree = [ { name = "rustc_version", version = "0.2.3" }, ] skip = [ + { name = "hlsl-snapshots", version = "0.1.0" }, ] wildcards = "deny" +allow-wildcard-paths = true [licenses] allow = [ @@ -26,7 +28,6 @@ allow = [ [sources] allow-git = [ # Waiting on releases; used in examples only - "https://github.com/SiegeEngine/ddsfile", "https://github.com/Razaekel/noise-rs", "https://github.com/grovesNL/glow", diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48a67b7926b..6bb8d382453 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,7 +56,7 @@ env: RUST_BACKTRACE: full PKG_CONFIG_ALLOW_CROSS: 1 # allow android to work RUSTFLAGS: --cfg=web_sys_unstable_apis -D warnings - RUSTDOCFLAGS: -Dwarnings + RUSTDOCFLAGS: --cfg=web_sys_unstable_apis -D warnings WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes CACHE_SUFFIX: c # cache busting diff --git a/CHANGELOG.md b/CHANGELOG.md index b6c4e43eb7f..6b24cd2b782 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,58 +39,84 @@ Bottom level categories: ## Unreleased -### All Public Dependencies Are Re-Exported +### Documentation -All of wgpu's public dependencies are now re-exported at the top level so that users don't need to take their own dependencies. -This includes: -- wgpu-core -- wgpu-hal -- naga -- raw_window_handle -- web_sys +- Document Wayland specific behavior related to `SurfaceTexture::present`. By @i509VCB in [#5092](https://github.com/gfx-rs/wgpu/pull/5092). -### `naga-ir` Shaders Have Dedicated Feature +### New features -The `naga-ir` feature has been added to allow you to add naga module shaders without guessing about what other features needed to be enabled to get access to it. +#### General + +- Many numeric built-ins have had a constant evaluation implementation added for them, which allows them to be used in a `const` context: + - [#4879](https://github.com/gfx-rs/wgpu/pull/4879) by @ErichDonGubler: + - `abs` + - `acos` + - `acosh` + - `asin` + - `asinh` + - `atan` + - `atanh` + - `cos` + - `cosh` + - `round` + - `saturate` + - `sin` + - `sinh` + - `sqrt` + - `step` + - `tan` + - `tanh` +- Eager release of GPU resources comes from device.trackers. By @bradwerth in [#5075](https://github.com/gfx-rs/wgpu/pull/5075) -### Direct3D 11 backend removal +### Bug Fixes -This backend had no functionality, and with the recent support for GL on Desktop, which allows wgpu to run on older devices, there is no need to keep the backend. +#### WGL -### `WGPU_ALLOW_UNDERLYING_NONCOMPLIANT_ADAPTER` environment variable +- In Surface::configure and Surface::present, fix the current GL context not being unset when releasing the lock that guards access to making the context current. This was causing other threads to panic when trying to make the context current. By @Imberflur in [#5087](https://github.com/gfx-rs/wgpu/pull/5087). -This adds a way to allow a Vulkan driver which is non-compliant per VK_KHR_driver_properties to be enumerated. This is intended for testing new Vulkan drivers which are not Vulkan compliant yet. +## v0.19.0 (2024-01-17) -### `DeviceExt::create_texture_with_data` Allows Mip-Major Data +This release includes: +- `wgpu` +- `wgpu-core` +- `wgpu-hal` +- `wgpu-types` +- `wgpu-info` +- `naga` (skipped from 0.14 to 0.19) +- `naga-cli` (skipped from 0.14 to 0.19) +- `d3d12` (skipped from 0.7 to 0.19) -Previously, `DeviceExt::create_texture_with_data` only allowed data to be provided in layer major order. There is now a `order` parameter which allows you to specify if the data is in layer major or mip major order. +### Improved Multithreading through internal use of Reference Counting -### `expose-ids` feature now available unconditionally +Large refactoring of wgpu’s internals aiming at reducing lock contention, and providing better performance when using wgpu on multiple threads. -This feature allowed you to call `global_id` on any wgpu opaque handle to get a unique hashable identity for the given resource. This is now available without the feature flag. By @cwfitzgerald in [#4841](https://github.com/gfx-rs/wgpu/pull/4841) +[Check the blog post!](https://gfx-rs.github.io/2023/11/24/arcanization.html) -### `dx12` and `metal` backend crate features +By @gents83 in [#3626](https://github.com/gfx-rs/wgpu/pull/3626) and thanks also to @jimblandy, @nical, @Wumpf, @Elabajaba & @cwfitzgerald -Wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal`) backend. These are enabled by default, but don't do anything when not targetting the corresponding OS. By @daxpedda in [#4815](https://github.com/gfx-rs/wgpu/pull/4815) +### All Public Dependencies are Re-Exported -### Unified surface creation +All of wgpu's public dependencies are now re-exported at the top level so that users don't need to take their own dependencies. +This includes: +- wgpu-core +- wgpu-hal +- naga +- raw_window_handle +- web_sys -Previously, there were various specialized surface creation functions for various platform specific handles. -Now, `wgpu::Instance::create_surface` & `wgpu::Instance::create_surface_unsafe` instead each take a value that can be converted to the unified `wgpu::SurfaceTarget`/`wgpu::SurfaceTargetUnsafe` enums. -Conversion to `wgpu::SurfaceTarget` is automatic for anything implementing `raw-window-handle`'s `HasWindowHandle` & `HasDisplayHandle` traits, -meaning that you can continue to e.g. pass references to winit windows as before. -By @wumpf in [#4984](https://github.com/gfx-rs/wgpu/pull/4984) +### Feature Flag Changes -### WebGPU & WebGL in the same binary +#### WebGPU & WebGL in the same Binary Enabling `webgl` no longer removes the `webgpu` backend. + Instead, there's a new (default enabled) `webgpu` feature that allows to explicitly opt-out of `webgpu` if so desired. If both `webgl` & `webgpu` are enabled, `wgpu::Instance` decides upon creation whether to target wgpu-core/WebGL or WebGPU. This means that adapter selection is not handled as with regular adapters, but still allows to decide at runtime whether `webgpu` or the `webgl` backend should be used using a single wasm binary. By @wumpf in [#5044](https://github.com/gfx-rs/wgpu/pull/5044) -### New Features +#### `naga-ir` Dedicated Feature #### General - Added `DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW` to know if `@builtin(vertex_index)` and `@builtin(instance_index)` will respect the `first_vertex` / `first_instance` in indirect calls. If this is not present, both will always start counting from 0. Currently enabled on all backends except DX12. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722) @@ -99,126 +125,188 @@ By @wumpf in [#5044](https://github.com/gfx-rs/wgpu/pull/5044) - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851) - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886) - DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862) -- Eager release of GPU resources comes from device.trackers. By @bradwerth in [#5075](https://github.com/gfx-rs/wgpu/pull/5075) +The `naga-ir` feature has been added to allow you to add naga module shaders without guessing about what other features needed to be enabled to get access to it. +By @cwfitzgerald in [#5063](https://github.com/gfx-rs/wgpu/pull/5063). -#### OpenGL -- `@builtin(instance_index)` now properly reflects the range provided in the draw call instead of always counting from 0. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722). -- Desktop GL now supports `POLYGON_MODE_LINE` and `POLYGON_MODE_POINT`. By @valaphee in [#4836](https://github.com/gfx-rs/wgpu/pull/4836) +#### `expose-ids` Feature available unconditionally -#### Naga +This feature allowed you to call `global_id` on any wgpu opaque handle to get a unique hashable identity for the given resource. This is now available without the feature flag. +By @cwfitzgerald in [#4841](https://github.com/gfx-rs/wgpu/pull/4841). -- Naga's WGSL front end now allows operators to produce values with abstract types, rather than concretizing thir operands. By @jimblandy in [#4850](https://github.com/gfx-rs/wgpu/pull/4850) and [#4870](https://github.com/gfx-rs/wgpu/pull/4870). +#### `dx12` and `metal` Backend Crate Features -- Naga's WGSL front and back ends now have experimental support for 64-bit floating-point literals: `1.0lf` denotes an `f64` value. There has been experimental support for an `f64` type for a while, but until now there was no syntax for writing literals with that type. As before, Naga module validation rejects `f64` values unless `naga::valid::Capabilities::FLOAT64` is requested. By @jimblandy in [#4747](https://github.com/gfx-rs/wgpu/pull/4747). +wgpu now exposes backend feature for the Direct3D 12 (`dx12`) and Metal (`metal`) backend. These are enabled by default, but don't do anything when not targeting the corresponding OS. +By @daxpedda in [#4815](https://github.com/gfx-rs/wgpu/pull/4815). -- Naga constant evaluation can now process binary operators whose operands are both vectors. By @jimblandy in [#4861](https://github.com/gfx-rs/wgpu/pull/4861). +### Direct3D 11 Backend Removal -- Add `--bulk-validate` option to Naga CLI. By @jimblandy in [#4871](https://github.com/gfx-rs/wgpu/pull/4871). +This backend had no functionality, and with the recent support for GL on Desktop, which allows wgpu to run on older devices, there was no need to keep this backend. +By @valaphee in [#4828](https://github.com/gfx-rs/wgpu/pull/4828). -- Naga's `cargo xtask validate` now runs validation jobs in parallel, using the [jobserver](https://crates.io/crates/jobserver) protocol to limit concurrency, and offers a `validate all` subcommand, which runs all available validation types. By @jimblandy in [#4902](https://github.com/gfx-rs/wgpu/pull/4902). +### `WGPU_ALLOW_UNDERLYING_NONCOMPLIANT_ADAPTER` Environment Variable -### Changes +This adds a way to allow a Vulkan driver which is non-compliant per `VK_KHR_driver_properties` to be enumerated. This is intended for testing new Vulkan drivers which are not Vulkan compliant yet. +By @i509VCB in [#4754](https://github.com/gfx-rs/wgpu/pull/4754). -- Arcanization of wgpu core resources: By @gents83 in [#3626](https://github.com/gfx-rs/wgpu/pull/3626) and thanks also to @jimblandy, @nical, @Wumpf, @Elabajaba & @cwfitzgerald - - Removed Token and LifeTime related management - - Removed RefCount and MultiRefCount in favour of using only Arc internal reference count - - Removing mut from resources and added instead internal members locks on demand or atomics operations - - Resources now implement Drop and destroy stuff when last Arc resources is released - - Resources hold an Arc in order to be able to implement Drop - - Resources have an utility to retrieve the id of the resource itself - - Remove all guards and just retrive the Arc needed on-demand to unlock registry of resources asap - - Verify correct resources release when unused or not needed - - Check Web and Metal compliation (thanks to @niklaskorz) - - Fix tests on all platforms - - Test a multithreaded scenario - - Storage is now holding only user-land resources, but Arc is keeping refcount for resources - - When user unregister a resource, it's not dropped if still in use due to refcount inside wgpu - - IdentityManager is now unique and free is called on resource drop instead of storage unregister - - Identity changes due to Arcanization and Registry being just the user reference - - Added MemLeaks test and fixing mem leaks +### `DeviceExt::create_texture_with_data` allows Mip-Major Data -#### General +Previously, `DeviceExt::create_texture_with_data` only allowed data to be provided in layer major order. There is now a `order` parameter which allows you to specify if the data is in layer major or mip major order. +```diff + let tex = ctx.device.create_texture_with_data( + &queue, + &descriptor, ++ wgpu::util::TextureDataOrder::LayerMajor, + src_data, + ); +``` -- Log vulkan validation layer messages during instance creation and destruction: By @exrook in [#4586](https://github.com/gfx-rs/wgpu/pull/4586) -- `TextureFormat::block_size` is deprecated, use `TextureFormat::block_copy_size` instead: By @wumpf in [#4647](https://github.com/gfx-rs/wgpu/pull/4647) -- Rename of `DispatchIndirect`, `DrawIndexedIndirect`, and `DrawIndirect` types in the `wgpu::util` module to `DispatchIndirectArgs`, `DrawIndexedIndirectArgs`, and `DrawIndirectArgs`. By @cwfitzgerald in [#4723](https://github.com/gfx-rs/wgpu/pull/4723). -- Make the size parameter of `encoder.clear_buffer` an `Option` instead of `Option>`. By @nical in [#4737](https://github.com/gfx-rs/wgpu/pull/4737) -- Reduce the `info` log level noise. By @nical in [#4769](https://github.com/gfx-rs/wgpu/pull/4769), [#4711](https://github.com/gfx-rs/wgpu/pull/4711) and [#4772](https://github.com/gfx-rs/wgpu/pull/4772) -- Rename `features` & `limits` fields of `DeviceDescriptor` to `required_features` & `required_limits`. By @teoxoy in [#4803](https://github.com/gfx-rs/wgpu/pull/4803) +By @cwfitzgerald in [#4780](https://github.com/gfx-rs/wgpu/pull/4780). -#### Safe `Surface` creation +### Safe & unified Surface Creation -It is now possible to safely create a `wgpu::Surface` with `Surface::create_surface()` by letting `Surface` hold a lifetime to `window`. +It is now possible to safely create a `wgpu::Surface` with `wgpu::Instance::create_surface()` by letting `wgpu::Surface` hold a lifetime to `window`. +Passing an owned value `window` to `Surface` will return a `wgpu::Surface<'static>`. -Passing an owned value `window` to `Surface` will return a `Surface<'static>`. Shared ownership over `window` can still be achieved with e.g. an `Arc`. Alternatively a reference could be passed, which will return a `Surface<'window>`. +All possible safe variants (owned windows and web canvases) are grouped using `wgpu::SurfaceTarget`. +Conversion to `wgpu::SurfaceTarget` is automatic for any type implementing `raw-window-handle`'s `HasWindowHandle` & `HasDisplayHandle` traits, i.e. most window types. +For web canvas types this has to be done explicitly: +```rust +let surface: wgpu::Surface<'static> = instance.create_surface(wgpu::SurfaceTarget::Canvas(my_canvas))?; +``` -`Surface::create_surface_from_raw()` can be used to continue producing a `Surface<'static>` without any lifetime requirements over `window`, which also remains `unsafe`. +All unsafe variants are now grouped under `wgpu::Instance::create_surface_unsafe` which takes the +`wgpu::SurfaceTargetUnsafe` enum and always returns `wgpu::Surface<'static>`. -#### Naga +In order to create a `wgpu::Surface<'static>` without passing ownership of the window use +`wgpu::SurfaceTargetUnsafe::from_window`: +```rust +let surface = unsafe { + instance.create_surface_unsafe(wgpu::SurfaceTargetUnsafe::from_window(&my_window))? +}; +``` +The easiest way to make this code safe is to use shared ownership: +```rust +let window: Arc; +// ... +let surface = instance.create_surface(my_window.clone())?; +``` -- Remove `span` and `validate` features. Always fully validate shader modules, and always track source positions for use in error messages. By @teoxoy in [#4706](https://github.com/gfx-rs/wgpu/pull/4706) -- Introduce a new `Scalar` struct type for use in Naga's IR, and update all frontend, middle, and backend code appropriately. By @jimblandy in [#4673](https://github.com/gfx-rs/wgpu/pull/4673). -- Add more metal keywords. By @fornwall in [#4707](https://github.com/gfx-rs/wgpu/pull/4707). +All platform specific surface creation using points have moved into `SurfaceTargetUnsafe` as well. +For example: -- Add partial support for WGSL abstract types (@jimblandy in [#4743](https://github.com/gfx-rs/wgpu/pull/4743), [#4755](https://github.com/gfx-rs/wgpu/pull/4755)). +Safety by @daxpedda in [#4597](https://github.com/gfx-rs/wgpu/pull/4597) +Unification by @wumpf in [#4984](https://github.com/gfx-rs/wgpu/pull/4984) - Abstract types make numeric literals easier to use, by - automatically converting literals and other constant expressions - from abstract numeric types to concrete types when safe and - necessary. For example, to build a vector of floating-point - numbers, Naga previously made you write: +### Add partial Support for WGSL Abstract Types - vec3(1.0, 2.0, 3.0) +Abstract types make numeric literals easier to use, by +automatically converting literals and other constant expressions +from abstract numeric types to concrete types when safe and +necessary. For example, to build a vector of floating-point +numbers, Naga previously made you write: +```rust +vec3(1.0, 2.0, 3.0) +``` +With this change, you can now simply write: +```rust +vec3(1, 2, 3) +``` +Even though the literals are abstract integers, Naga recognizes +that it is safe and necessary to convert them to `f32` values in +order to build the vector. You can also use abstract values as +initializers for global constants and global and local variables, +like this: +```rust +var unit_x: vec2 = vec2(1, 0); +``` +The literals `1` and `0` are abstract integers, and the expression +`vec2(1, 0)` is an abstract vector. However, Naga recognizes that +it can convert that to the concrete type `vec2` to satisfy +the given type of `unit_x`. +The WGSL specification permits abstract integers and +floating-point values in almost all contexts, but Naga's support +for this is still incomplete. Many WGSL operators and builtin +functions are specified to produce abstract results when applied +to abstract inputs, but for now Naga simply concretizes them all +before applying the operation. We will expand Naga's abstract type +support in subsequent pull requests. +As part of this work, the public types `naga::ScalarKind` and +`naga::Literal` now have new variants, `AbstractInt` and `AbstractFloat`. + +By @jimblandy in [#4743](https://github.com/gfx-rs/wgpu/pull/4743), [#4755](https://github.com/gfx-rs/wgpu/pull/4755). + +### `Instance::enumerate_adapters` now returns `Vec` instead of an `ExactSizeIterator` + +This allows us to support WebGPU and WebGL in the same binary. - With this change, you can now simply write: +```diff +- let adapters: Vec = instance.enumerate_adapters(wgpu::Backends::all()).collect(); ++ let adapters: Vec = instance.enumerate_adapters(wgpu::Backends::all()); +``` - vec3(1, 2, 3) +By @wumpf in [#5044](https://github.com/gfx-rs/wgpu/pull/5044) - Even though the literals are abstract integers, Naga recognizes - that it is safe and necessary to convert them to `f32` values in - order to build the vector. You can also use abstract values as - initializers for global constants and global and local variables, - like this: +### `device.poll()` now returns a `MaintainResult` instead of a `bool` - var unit_x: vec2 = vec2(1, 0); +This is a forward looking change, as we plan to add more information to the `MaintainResult` in the future. +This enum has the same data as the boolean, but with some useful helper functions. - The literals `1` and `0` are abstract integers, and the expression - `vec2(1, 0)` is an abstract vector. However, Naga recognizes that - it can convert that to the concrete type `vec2` to satisfy - the given type of `unit_x`. +```diff +- let queue_finished: bool = device.poll(wgpu::Maintain::Wait); ++ let queue_finished: bool = device.poll(wgpu::Maintain::Wait).is_queue_empty(); +``` - The WGSL specification permits abstract integers and - floating-point values in almost all contexts, but Naga's support - for this is still incomplete. Many WGSL operators and builtin - functions are specified to produce abstract results when applied - to abstract inputs, but for now Naga simply concretizes them all - before applying the operation. We will expand Naga's abstract type - support in subsequent pull requests. +By @cwfitzgerald in [#5053](https://github.com/gfx-rs/wgpu/pull/5053) - As part of this work, the public types `naga::ScalarKind` and - `naga::Literal` now have new variants, `AbstractInt` and `AbstractFloat`. +### New Features -- Add a new `naga::Literal` variant, `I64`, for signed 64-bit literals. [#4711](https://github.com/gfx-rs/wgpu/pull/4711) +#### General +- Added `DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW` to know if `@builtin(vertex_index)` and `@builtin(instance_index)` will respect the `first_vertex` / `first_instance` in indirect calls. If this is not present, both will always start counting from 0. Currently enabled on all backends except DX12. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722). +- Added support for the `FLOAT32_FILTERABLE` feature (web and native, corresponds to WebGPU's `float32-filterable`). By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759). +- GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851). +- wgpu and wgpu-core cargo feature flags are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886). +- DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862). +- Log vulkan validation layer messages during instance creation and destruction: By @exrook in [#4586](https://github.com/gfx-rs/wgpu/pull/4586). +- `TextureFormat::block_size` is deprecated, use `TextureFormat::block_copy_size` instead: By @wumpf in [#4647](https://github.com/gfx-rs/wgpu/pull/4647). +- Rename of `DispatchIndirect`, `DrawIndexedIndirect`, and `DrawIndirect` types in the `wgpu::util` module to `DispatchIndirectArgs`, `DrawIndexedIndirectArgs`, and `DrawIndirectArgs`. By @cwfitzgerald in [#4723](https://github.com/gfx-rs/wgpu/pull/4723). +- Make the size parameter of `encoder.clear_buffer` an `Option` instead of `Option>`. By @nical in [#4737](https://github.com/gfx-rs/wgpu/pull/4737). +- Reduce the `info` log level noise. By @nical in [#4769](https://github.com/gfx-rs/wgpu/pull/4769), [#4711](https://github.com/gfx-rs/wgpu/pull/4711) and [#4772](https://github.com/gfx-rs/wgpu/pull/4772) +- Rename `features` & `limits` fields of `DeviceDescriptor` to `required_features` & `required_limits`. By @teoxoy in [#4803](https://github.com/gfx-rs/wgpu/pull/4803). +- `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899) -- Emit and init `struct` member padding always. By @ErichDonGubler in [#4701](https://github.com/gfx-rs/wgpu/pull/4701). +#### OpenGL +- `@builtin(instance_index)` now properly reflects the range provided in the draw call instead of always counting from 0. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722). +- Desktop GL now supports `POLYGON_MODE_LINE` and `POLYGON_MODE_POINT`. By @valaphee in [#4836](https://github.com/gfx-rs/wgpu/pull/4836). -- In WGSL output, always include the `i` suffix on `i32` literals. By @jimblandy in [#4863](https://github.com/gfx-rs/wgpu/pull/4863). +#### Naga +- Naga's WGSL front end now allows operators to produce values with abstract types, rather than concretizing thir operands. By @jimblandy in [#4850](https://github.com/gfx-rs/wgpu/pull/4850) and [#4870](https://github.com/gfx-rs/wgpu/pull/4870). +- Naga's WGSL front and back ends now have experimental support for 64-bit floating-point literals: `1.0lf` denotes an `f64` value. There has been experimental support for an `f64` type for a while, but until now there was no syntax for writing literals with that type. As before, Naga module validation rejects `f64` values unless `naga::valid::Capabilities::FLOAT64` is requested. By @jimblandy in [#4747](https://github.com/gfx-rs/wgpu/pull/4747). +- Naga constant evaluation can now process binary operators whose operands are both vectors. By @jimblandy in [#4861](https://github.com/gfx-rs/wgpu/pull/4861). +- Add `--bulk-validate` option to Naga CLI. By @jimblandy in [#4871](https://github.com/gfx-rs/wgpu/pull/4871). +- Naga's `cargo xtask validate` now runs validation jobs in parallel, using the [jobserver](https://crates.io/crates/jobserver) protocol to limit concurrency, and offers a `validate all` subcommand, which runs all available validation types. By @jimblandy in [#4902](https://github.com/gfx-rs/wgpu/pull/4902). +- Remove `span` and `validate` features. Always fully validate shader modules, and always track source positions for use in error messages. By @teoxoy in [#4706](https://github.com/gfx-rs/wgpu/pull/4706). +- Introduce a new `Scalar` struct type for use in Naga's IR, and update all frontend, middle, and backend code appropriately. By @jimblandy in [#4673](https://github.com/gfx-rs/wgpu/pull/4673). +- Add more metal keywords. By @fornwall in [#4707](https://github.com/gfx-rs/wgpu/pull/4707). +- Add a new `naga::Literal` variant, `I64`, for signed 64-bit literals. [#4711](https://github.com/gfx-rs/wgpu/pull/4711). +- Emit and init `struct` member padding always. By @ErichDonGubler in [#4701](https://github.com/gfx-rs/wgpu/pull/4701). +- In WGSL output, always include the `i` suffix on `i32` literals. By @jimblandy in [#4863](https://github.com/gfx-rs/wgpu/pull/4863). - In WGSL output, always include the `f` suffix on `f32` literals. By @jimblandy in [#4869](https://github.com/gfx-rs/wgpu/pull/4869). ### Bug Fixes #### General -- `BufferMappedRange` trait is now `WasmNotSendSync`, i.e. it is `Send`/`Sync` if not on wasm or `fragile-send-sync-non-atomic-wasm` is enabled. By @wumpf in [#4818](https://github.com/gfx-rs/wgpu/pull/4818) -- Align `wgpu_types::CompositeAlphaMode` serde serialization to spec. By @littledivy in [#4940](https://github.com/gfx-rs/wgpu/pull/4940) -- Fix error message of `ConfigureSurfaceError::TooLarge`. By @Dinnerbone in [#4960](https://github.com/gfx-rs/wgpu/pull/4960) -- Fix dropping of `DeviceLostCallbackC` params. By @bradwerth in [#5032](https://github.com/gfx-rs/wgpu/pull/5032) -- Fixed a number of panics. by @nical in [#4999](https://github.com/gfx-rs/wgpu/pull/4999), [#5014](https://github.com/gfx-rs/wgpu/pull/5014), [#5024](https://github.com/gfx-rs/wgpu/pull/5024), [#5025](https://github.com/gfx-rs/wgpu/pull/5025), [#5026](https://github.com/gfx-rs/wgpu/pull/5026), [#5027](https://github.com/gfx-rs/wgpu/pull/5027), [#5028](https://github.com/gfx-rs/wgpu/pull/5028) and [#5042](https://github.com/gfx-rs/wgpu/pull/5042). +- `BufferMappedRange` trait is now `WasmNotSendSync`, i.e. it is `Send`/`Sync` if not on wasm or `fragile-send-sync-non-atomic-wasm` is enabled. By @wumpf in [#4818](https://github.com/gfx-rs/wgpu/pull/4818). +- Align `wgpu_types::CompositeAlphaMode` serde serialization to spec. By @littledivy in [#4940](https://github.com/gfx-rs/wgpu/pull/4940). +- Fix error message of `ConfigureSurfaceError::TooLarge`. By @Dinnerbone in [#4960](https://github.com/gfx-rs/wgpu/pull/4960). +- Fix dropping of `DeviceLostCallbackC` params. By @bradwerth in [#5032](https://github.com/gfx-rs/wgpu/pull/5032). +- Fixed a number of panics. By @nical in [#4999](https://github.com/gfx-rs/wgpu/pull/4999), [#5014](https://github.com/gfx-rs/wgpu/pull/5014), [#5024](https://github.com/gfx-rs/wgpu/pull/5024), [#5025](https://github.com/gfx-rs/wgpu/pull/5025), [#5026](https://github.com/gfx-rs/wgpu/pull/5026), [#5027](https://github.com/gfx-rs/wgpu/pull/5027), [#5028](https://github.com/gfx-rs/wgpu/pull/5028) and [#5042](https://github.com/gfx-rs/wgpu/pull/5042). +- No longer validate surfaces against their allowed extent range on configure. This caused warnings that were almost impossible to avoid. As before, the resulting behavior depends on the compositor. By @wumpf in [#4796](https://github.com/gfx-rs/wgpu/pull/4796). #### DX12 -- Fixed D3D12_SUBRESOURCE_FOOTPRINT calculation for block compressed textures which caused a crash with `Queue::write_texture` on DX12. By @DTZxPorter in [#4990](https://github.com/gfx-rs/wgpu/pull/4990) +- Fixed D3D12_SUBRESOURCE_FOOTPRINT calculation for block compressed textures which caused a crash with `Queue::write_texture` on DX12. By @DTZxPorter in [#4990](https://github.com/gfx-rs/wgpu/pull/4990). #### Vulkan @@ -226,26 +314,19 @@ Passing an owned value `window` to `Surface` will return a `Surface<'static>`. S #### WebGPU -- Allow calling `BufferSlice::get_mapped_range` multiple times on the same buffer slice (instead of throwing a Javascript exception): By @DouglasDwyer in [#4726](https://github.com/gfx-rs/wgpu/pull/4726) +- Allow calling `BufferSlice::get_mapped_range` multiple times on the same buffer slice (instead of throwing a Javascript exception). By @DouglasDwyer in [#4726](https://github.com/gfx-rs/wgpu/pull/4726). #### WGL -- Create a hidden window per `wgpu::Instance` instead of sharing a global one. +- Create a hidden window per `wgpu::Instance` instead of sharing a global one. By @Zoxc in [#4603](https://github.com/gfx-rs/wgpu/issues/4603) #### Naga - Make module compaction preserve the module's named types, even if they are unused. By @jimblandy in [#4734](https://github.com/gfx-rs/wgpu/pull/4734). - - Improve algorithm used by module compaction. By @jimblandy in [#4662](https://github.com/gfx-rs/wgpu/pull/4662). - - When reading GLSL, fix the argument types of the double-precision floating-point overloads of the `dot`, `reflect`, `distance`, and `ldexp` builtin functions. Correct the WGSL generated for constructing 64-bit floating-point matrices. Add tests for all the above. By @jimblandy in [#4684](https://github.com/gfx-rs/wgpu/pull/4684). - - Allow Naga's IR types to represent matrices with elements elements of any scalar kind. This makes it possible for Naga IR types to represent WGSL abstract matrices. By @jimblandy in [#4735](https://github.com/gfx-rs/wgpu/pull/4735). - -- When evaluating const-expressions and generating SPIR-V, properly handle `Compose` expressions whose operands are `Splat` expressions. Such expressions are created and marked as constant by the constant evaluator. By @jimblandy in [#4695](https://github.com/gfx-rs/wgpu/pull/4695). - - Preserve the source spans for constants and expressions correctly across module compaction. By @jimblandy in [#4696](https://github.com/gfx-rs/wgpu/pull/4696). - - Record the names of WGSL `alias` declarations in Naga IR `Type`s. By @jimblandy in [#4733](https://github.com/gfx-rs/wgpu/pull/4733). #### Metal @@ -254,9 +335,18 @@ Passing an owned value `window` to `Surface` will return a `Surface<'static>`. S ### Examples -- remove winit dependency from hello-compute example by @psvri in [#4699](https://github.com/gfx-rs/wgpu/pull/4699) -- hello-compute example fix failure with "wgpu error: Validation Error" if arguments are missing by @vilcans in [#4939](https://github.com/gfx-rs/wgpu/pull/4939) -- Made the examples page not crash on Chrome on Android, and responsive to screen sizes by @Dinnerbone in [#4958](https://github.com/gfx-rs/wgpu/pull/4958) +- remove winit dependency from hello-compute example. By @psvri in [#4699](https://github.com/gfx-rs/wgpu/pull/4699) +- hello-compute example fix failure with `wgpu error: Validation Error` if arguments are missing. By @vilcans in [#4939](https://github.com/gfx-rs/wgpu/pull/4939). +- Made the examples page not crash on Chrome on Android, and responsive to screen sizes. By @Dinnerbone in [#4958](https://github.com/gfx-rs/wgpu/pull/4958). + +## v0.18.2 (2023-12-06) + +This release includes `naga` version 0.14.2. The crates `wgpu-core`, `wgpu-hal` are still at `0.18.1` and the crates `wgpu` and `wgpu-types` are still at `0.18.0`. + +### Bug Fixes + +#### Naga +- When evaluating const-expressions and generating SPIR-V, properly handle `Compose` expressions whose operands are `Splat` expressions. Such expressions are created and marked as constant by the constant evaluator. By @jimblandy in [#4695](https://github.com/gfx-rs/wgpu/pull/4695). ## v0.18.1 (2023-11-15) diff --git a/Cargo.lock b/Cargo.lock index 0d317e1476b..c493eb6a888 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,7 +68,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39b801912a977c3fd52d80511fe1c0c8480c6f957f21ae2ce1b92ffe970cf4b9" dependencies = [ "android-properties", - "bitflags 2.4.1", + "bitflags 2.4.2", "cc", "cesu8", "jni", @@ -105,9 +105,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.7" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd2405b3ac1faab2990b74d728624cd9fd115651fcecc7c2d8daf01376275ba" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -319,9 +319,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" dependencies = [ "arbitrary", "serde", @@ -410,7 +410,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b50b5a44d59a98c55a9eeb518f39bf7499ba19fd98ee7d22618687f3f10adbf" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "log", "polling", "rustix", @@ -502,9 +502,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.16" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58e54881c004cec7895b0068a0a954cd5d62da01aef83fa35b1e594497bf5445" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" dependencies = [ "clap_builder", "clap_derive", @@ -512,9 +512,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.16" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59cb82d7f531603d2fd1f507441cdd35184fa81beff7bd489570de7f773460bb" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" dependencies = [ "anstream", "anstyle", @@ -895,9 +895,9 @@ checksum = "96a6ac251f4a2aca6b3f91340350eab87ae57c3f127ffeb585e92bd336717991" [[package]] name = "d3d12" -version = "0.7.0" +version = "0.19.0" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "libloading 0.8.1", "winapi", ] @@ -1177,9 +1177,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95b3f3e67048839cb0d0781f445682a35113da7121f7c949db0e2be96a4fbece" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" dependencies = [ "humantime", "is-terminal", @@ -1222,9 +1222,9 @@ checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "fdeflate" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "209098dd6dfc4445aa6111f0e98653ac323eaa4dfd212c9ca3931bf9955c31bd" +checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645" dependencies = [ "simd-adler32", ] @@ -1505,8 +1505,9 @@ checksum = "151665d9be52f9bb40fc7966565d39666f2d1e69233571b71b87791c7e0528b3" [[package]] name = "glow" -version = "0.13.0" -source = "git+https://github.com/grovesNL/glow.git?rev=29ff917a2b2ff7ce0a81b2cc5681de6d4735b36e#29ff917a2b2ff7ce0a81b2cc5681de6d4735b36e" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1" dependencies = [ "js-sys", "slotmap", @@ -1594,7 +1595,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "gpu-alloc-types", ] @@ -1604,7 +1605,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", ] [[package]] @@ -1626,7 +1627,7 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc11df1ace8e7e564511f53af41f3e42ddc95b56fd07b3f4445d2a6048bc682c" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "gpu-descriptor-types", "hashbrown", ] @@ -1637,7 +1638,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6bf0b36e6f090b7e1d8a4b49c0cb81c1f8376f72198c65dd3ad9ff3556b8b78c" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", ] [[package]] @@ -1662,7 +1663,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af2a7e73e1f34c48da31fb668a907f250794837e08faa144fd24f0b8b741e890" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "com", "libc", "libloading 0.8.1", @@ -1679,9 +1680,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" [[package]] name = "hexf-parse" @@ -1923,7 +1924,7 @@ version = "0.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3af92c55d7d839293953fcd0fda5ecfe93297cfde6ffbdec13b41d99c0ba6607" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "libc", "redox_syscall 0.4.1", ] @@ -1941,9 +1942,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -2018,7 +2019,7 @@ version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "block", "core-graphics-types", "foreign-types 0.5.0", @@ -2051,12 +2052,13 @@ dependencies = [ [[package]] name = "naga" -version = "0.14.2" +version = "0.19.0" dependencies = [ "arbitrary", + "arrayvec 0.7.4", "bincode", "bit-set", - "bitflags 2.4.1", + "bitflags 2.4.2", "codespan-reporting", "criterion", "diff", @@ -2080,7 +2082,7 @@ dependencies = [ [[package]] name = "naga-cli" -version = "0.14.0" +version = "0.19.0" dependencies = [ "argh", "bincode", @@ -2143,7 +2145,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "jni-sys", "log", "ndk-sys 0.5.0+25.2.9519653", @@ -2548,13 +2550,13 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "player" -version = "0.18.0" +version = "0.19.0" dependencies = [ "env_logger", "log", @@ -2563,7 +2565,7 @@ dependencies = [ "serde", "wgpu-core", "wgpu-types", - "winit 0.29.9", + "winit 0.29.10", ] [[package]] @@ -2687,9 +2689,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.76" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -2771,9 +2773,9 @@ checksum = "42a9830a0e1b9fb145ebb365b8bc4ccd75f290f98c0247deafbbe2c75cefb544" [[package]] name = "rayon" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" dependencies = [ "either", "rayon-core", @@ -2781,9 +2783,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -2809,9 +2811,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", @@ -2821,9 +2823,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" dependencies = [ "aho-corasick", "memchr", @@ -2849,7 +2851,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64", - "bitflags 2.4.1", + "bitflags 2.4.2", "serde", "serde_derive", ] @@ -2899,7 +2901,7 @@ version = "0.38.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", @@ -3107,9 +3109,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "smithay-client-toolkit" @@ -3136,7 +3138,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60e3d9941fa3bacf7c2bf4b065304faa14164151254cd16ce1b1bc8fc381600f" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "calloop 0.12.3", "calloop-wayland-source", "cursor-icon", @@ -3157,9 +3159,9 @@ dependencies = [ [[package]] name = "smol_str" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74212e6bbe9a4352329b2f68ba3130c15a3f26fe88ff22dbdc6cdd58fa85e99c" +checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49" dependencies = [ "serde", ] @@ -3213,7 +3215,7 @@ version = "0.3.0+sdk-1.3.268.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "serde", ] @@ -3525,9 +3527,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-id" @@ -3601,9 +3603,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom", "serde", @@ -3782,7 +3784,7 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ca7d52347346f5473bf2f56705f360e8440873052e575e55890c4fa57843ed3" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "nix 0.26.4", "wayland-backend", "wayland-scanner 0.31.0", @@ -3806,7 +3808,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "625c5029dbd43d25e6aa9615e88b829a5cad13b2819c4ae129fdbb7c31ab4c7e" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "cursor-icon", "wayland-backend", ] @@ -3861,7 +3863,7 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e253d7107ba913923dc253967f35e8561a3c65f914543e46843c88ddd729e21c" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "wayland-backend", "wayland-client 0.31.1", "wayland-scanner 0.31.0", @@ -3873,7 +3875,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23803551115ff9ea9bce586860c5c5a971e360825a0309264102a9495a5ff479" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "wayland-backend", "wayland-client 0.31.1", "wayland-protocols 0.31.0", @@ -3886,7 +3888,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad1f61b76b6c2d8742e10f9ba5c3737f6530b4c243132c2a2ccc8aa96fe25cd6" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "wayland-backend", "wayland-client 0.31.1", "wayland-protocols 0.31.0", @@ -3960,7 +3962,7 @@ dependencies = [ [[package]] name = "wgpu" -version = "0.18.0" +version = "0.19.0" dependencies = [ "arrayvec 0.7.4", "cfg-if", @@ -3984,11 +3986,11 @@ dependencies = [ [[package]] name = "wgpu-core" -version = "0.18.0" +version = "0.19.0" dependencies = [ "arrayvec 0.7.4", "bit-vec", - "bitflags 2.4.1", + "bitflags 2.4.2", "cfg_aliases", "codespan-reporting", "indexmap", @@ -4010,7 +4012,7 @@ dependencies = [ [[package]] name = "wgpu-examples" -version = "0.18.0" +version = "0.19.0" dependencies = [ "bytemuck", "cfg-if", @@ -4038,18 +4040,18 @@ dependencies = [ "wgpu", "wgpu-hal", "wgpu-test", - "winit 0.29.9", + "winit 0.29.10", ] [[package]] name = "wgpu-hal" -version = "0.18.0" +version = "0.19.0" dependencies = [ "android_system_properties", "arrayvec 0.7.4", "ash", "bit-set", - "bitflags 2.4.1", + "bitflags 2.4.2", "block", "cfg-if", "cfg_aliases", @@ -4085,15 +4087,15 @@ dependencies = [ "web-sys", "wgpu-types", "winapi", - "winit 0.29.9", + "winit 0.29.10", ] [[package]] name = "wgpu-info" -version = "0.18.0" +version = "0.19.0" dependencies = [ "anyhow", - "bitflags 2.4.1", + "bitflags 2.4.2", "env_logger", "pico-args", "serde", @@ -4104,7 +4106,7 @@ dependencies = [ [[package]] name = "wgpu-macros" -version = "0.18.0" +version = "0.19.0" dependencies = [ "heck", "quote", @@ -4113,11 +4115,11 @@ dependencies = [ [[package]] name = "wgpu-test" -version = "0.18.0" +version = "0.19.0" dependencies = [ "anyhow", "arrayvec 0.7.4", - "bitflags 2.4.1", + "bitflags 2.4.2", "bytemuck", "cfg-if", "console_log", @@ -4149,9 +4151,9 @@ dependencies = [ [[package]] name = "wgpu-types" -version = "0.18.0" +version = "0.19.0" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "js-sys", "serde", "serde_json", @@ -4502,14 +4504,14 @@ dependencies = [ [[package]] name = "winit" -version = "0.29.9" +version = "0.29.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2376dab13e09c01ad8b679f0dbc7038af4ec43d9a91344338e37bd686481550" +checksum = "4c824f11941eeae66ec71111cc2674373c772f482b58939bb4066b642aa2ffcf" dependencies = [ "ahash", "android-activity", "atomic-waker", - "bitflags 2.4.1", + "bitflags 2.4.2", "bytemuck", "calloop 0.12.3", "cfg_aliases", @@ -4610,7 +4612,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6924668544c48c0133152e7eec86d644a056ca3d09275eb8d5cdb9855f9d8699" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "dlib", "log", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 8c9c056803e..bde4e8451a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,27 +45,27 @@ keywords = ["graphics"] license = "MIT OR Apache-2.0" homepage = "https://wgpu.rs/" repository = "https://github.com/gfx-rs/wgpu" -version = "0.18.0" +version = "0.19.0" authors = ["gfx-rs developers"] [workspace.dependencies.wgc] package = "wgpu-core" path = "./wgpu-core" -version = "0.18.0" +version = "0.19.0" [workspace.dependencies.wgt] package = "wgpu-types" path = "./wgpu-types" -version = "0.18.0" +version = "0.19.0" [workspace.dependencies.hal] package = "wgpu-hal" path = "./wgpu-hal" -version = "0.18.0" +version = "0.19.0" [workspace.dependencies.naga] path = "./naga" -version = "0.14.0" +version = "0.19.0" [workspace.dependencies] anyhow = "1.0" @@ -117,12 +117,12 @@ serde_json = "1.0.111" smallvec = "1" static_assertions = "1.1.0" thiserror = "1" -wgpu = { version = "0.18.0", path = "./wgpu" } -wgpu-core = { version = "0.18.0", path = "./wgpu-core" } -wgpu-example = { version = "0.18.0", path = "./examples/common" } -wgpu-macros = { version = "0.18.0", path = "./wgpu-macros" } -wgpu-test = { version = "0.18.0", path = "./tests" } -wgpu-types = { version = "0.18.0", path = "./wgpu-types" } +wgpu = { version = "0.19.0", path = "./wgpu" } +wgpu-core = { version = "0.19.0", path = "./wgpu-core" } +wgpu-example = { version = "0.19.0", path = "./examples/common" } +wgpu-macros = { version = "0.19.0", path = "./wgpu-macros" } +wgpu-test = { version = "0.19.0", path = "./tests" } +wgpu-types = { version = "0.19.0", path = "./wgpu-types" } winit = { version = "0.29", features = ["android-native-activity"] } # Metal dependencies @@ -150,7 +150,7 @@ hassle-rs = "0.11.0" # Gles dependencies khronos-egl = "6" -glow = "0.12.3" +glow = "0.13.1" glutin = "0.29.1" # wasm32 dependencies diff --git a/d3d12/Cargo.toml b/d3d12/Cargo.toml index 91f35429488..10c68eab775 100644 --- a/d3d12/Cargo.toml +++ b/d3d12/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "d3d12" -version = "0.7.0" +version = "0.19.0" authors = ["gfx-rs developers"] description = "Low level D3D12 API wrapper" repository = "https://github.com/gfx-rs/wgpu/tree/trunk/d3d12" diff --git a/deno_webgpu/Cargo.toml b/deno_webgpu/Cargo.toml index b9d281f04d5..d1097c658d2 100644 --- a/deno_webgpu/Cargo.toml +++ b/deno_webgpu/Cargo.toml @@ -40,7 +40,7 @@ workspace = true features = ["dx12"] [target.'cfg(windows)'.dependencies.wgpu-hal] -version = "0.18.0" +version = "0.19.0" path = "../wgpu-hal" features = ["windows_rs"] diff --git a/examples/src/framework.rs b/examples/src/framework.rs index dd2c2ee6d10..a22f9c11134 100644 --- a/examples/src/framework.rs +++ b/examples/src/framework.rs @@ -571,6 +571,7 @@ impl From> format, width: params.width, height: params.height, + desired_maximum_frame_latency: 2, present_mode: wgpu::PresentMode::Fifo, alpha_mode: wgpu::CompositeAlphaMode::Auto, view_formats: vec![format], diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs index 40cb805c28b..faa1db8f8b2 100644 --- a/examples/src/hello_triangle/mod.rs +++ b/examples/src/hello_triangle/mod.rs @@ -72,16 +72,9 @@ async fn run(event_loop: EventLoop<()>, window: Window) { multiview: None, }); - let mut config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: swapchain_format, - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: swapchain_capabilities.alpha_modes[0], - view_formats: vec![], - }; - + let mut config = surface + .get_default_config(&adapter, size.width, size.height) + .unwrap(); surface.configure(&device, &config); let window = &window; diff --git a/examples/src/hello_windows/mod.rs b/examples/src/hello_windows/mod.rs index 9a42b9afbd7..7d81dbef7b1 100644 --- a/examples/src/hello_windows/mod.rs +++ b/examples/src/hello_windows/mod.rs @@ -30,20 +30,11 @@ impl ViewportDesc { fn build(self, adapter: &wgpu::Adapter, device: &wgpu::Device) -> Viewport { let size = self.window.inner_size(); - - let caps = self.surface.get_capabilities(adapter); - let config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: caps.formats[0], - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: caps.alpha_modes[0], - view_formats: vec![], - }; - + let config = self + .surface + .get_default_config(adapter, size.width, size.height) + .unwrap(); self.surface.configure(device, &config); - Viewport { desc: self, config } } } diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs index de71ce50671..4a31ddc0693 100644 --- a/examples/src/uniform_values/mod.rs +++ b/examples/src/uniform_values/mod.rs @@ -192,15 +192,9 @@ impl WgpuContext { multiview: None, }); - let surface_config = wgpu::SurfaceConfiguration { - usage: wgpu::TextureUsages::RENDER_ATTACHMENT, - format: swapchain_format, - width: size.width, - height: size.height, - present_mode: wgpu::PresentMode::Fifo, - alpha_mode: swapchain_capabilities.alpha_modes[0], - view_formats: vec![], - }; + let surface_config = surface + .get_default_config(&adapter, size.width, size.height) + .unwrap(); surface.configure(&device, &surface_config); // (5) diff --git a/naga-cli/Cargo.toml b/naga-cli/Cargo.toml index 7b8c3024f12..9fe22e34615 100644 --- a/naga-cli/Cargo.toml +++ b/naga-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "naga-cli" -version = "0.14.0" +version = "0.19.0" authors = ["gfx-rs developers"] edition = "2021" description = "Shader translation command line tool" @@ -25,7 +25,7 @@ env_logger = "0.10" argh = "0.1.5" [dependencies.naga] -version = "0.14" +version = "0.19" path = "../naga" features = [ "compact", diff --git a/naga/Cargo.toml b/naga/Cargo.toml index 094c4779a59..4435a6f2111 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "naga" -version = "0.14.2" +version = "0.19.0" authors = ["gfx-rs developers"] edition = "2021" description = "Shader translation infrastructure" @@ -42,7 +42,7 @@ harness = false [dependencies] arbitrary = { version = "1.3", features = ["derive"], optional = true } -bitflags = "2.2" +bitflags = "2.4" bit-set = "0.5" termcolor = { version = "1.4.1" } # remove termcolor dep when updating to the next version of codespan-reporting @@ -60,6 +60,7 @@ petgraph = { version = "0.6", optional = true } pp-rs = { version = "0.2.1", optional = true } hexf-parse = { version = "0.2.1", optional = true } unicode-xid = { version = "0.2.3", optional = true } +arrayvec.workspace = true [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] criterion = { version = "0.5", features = [] } @@ -68,7 +69,10 @@ criterion = { version = "0.5", features = [] } bincode = "1" diff = "0.1" env_logger = "0.10" -hlsl-snapshots = { version = "0.1.0", path = "./hlsl-snapshots" } +# This _cannot_ have a version specified. If it does, crates.io will look +# for a version of the package on crates when we publish naga. Path dependencies +# are allowed through though. +hlsl-snapshots = { path = "./hlsl-snapshots" } # Require at least version 0.7.1 of ron, this version changed how floating points are # serialized by forcing them to always have the decimal part, this makes it backwards # incompatible with our tests because we do a syntatic diff and not a semantic one. diff --git a/naga/fuzz/Cargo.toml b/naga/fuzz/Cargo.toml index 4285142c06a..1f1c1814ba5 100644 --- a/naga/fuzz/Cargo.toml +++ b/naga/fuzz/Cargo.toml @@ -15,7 +15,7 @@ libfuzzer-sys = "0.4" [target.'cfg(not(any(target_arch = "wasm32", target_os = "ios")))'.dependencies.naga] path = ".." -version = "0.14.0" +version = "0.19.0" features = ["arbitrary", "spv-in", "wgsl-in", "glsl-in"] [[bin]] diff --git a/naga/src/lib.rs b/naga/src/lib.rs index b27ebc67647..bfd8359d881 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -458,6 +458,10 @@ pub enum VectorSize { Quad = 4, } +impl VectorSize { + const MAX: usize = Self::Quad as u8 as usize; +} + /// Primitive type for a scalar. #[repr(u8)] #[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)] diff --git a/naga/src/proc/constant_evaluator.rs b/naga/src/proc/constant_evaluator.rs index 82efeece4e4..a84863066ea 100644 --- a/naga/src/proc/constant_evaluator.rs +++ b/naga/src/proc/constant_evaluator.rs @@ -1,9 +1,227 @@ +use std::iter; + +use arrayvec::ArrayVec; + use crate::{ arena::{Arena, Handle, UniqueArena}, ArraySize, BinaryOperator, Constant, Expression, Literal, ScalarKind, Span, Type, TypeInner, UnaryOperator, }; +/// A macro that allows dollar signs (`$`) to be emitted by other macros. Useful for generating +/// `macro_rules!` items that, in turn, emit their own `macro_rules!` items. +/// +/// Technique stolen directly from +/// . +macro_rules! with_dollar_sign { + ($($body:tt)*) => { + macro_rules! __with_dollar_sign { $($body)* } + __with_dollar_sign!($); + } +} + +macro_rules! gen_component_wise_extractor { + ( + $ident:ident -> $target:ident, + literals: [$( $literal:ident => $mapping:ident: $ty:ident ),+ $(,)?], + scalar_kinds: [$( $scalar_kind:ident ),* $(,)?], + ) => { + /// A subset of [`Literal`]s intended to be used for implementing numeric built-ins. + enum $target { + $( + #[doc = concat!( + "Maps to [`Literal::", + stringify!($mapping), + "`]", + )] + $mapping([$ty; N]), + )+ + } + + impl From<$target<1>> for Expression { + fn from(value: $target<1>) -> Self { + match value { + $( + $target::$mapping([value]) => { + Expression::Literal(Literal::$literal(value)) + } + )+ + } + } + } + + #[doc = concat!( + "Attempts to evaluate multiple `exprs` as a combined [`", + stringify!($target), + "`] to pass to `handler`. ", + )] + /// If `exprs` are vectors of the same length, `handler` is called for each corresponding + /// component of each vector. + /// + /// `handler`'s output is registered as a new expression. If `exprs` are vectors of the + /// same length, a new vector expression is registered, composed of each component emitted + /// by `handler`. + fn $ident( + eval: &mut ConstantEvaluator<'_>, + span: Span, + exprs: [Handle; N], + mut handler: F, + ) -> Result, ConstantEvaluatorError> + where + $target: Into, + F: FnMut($target) -> Result<$target, ConstantEvaluatorError> + Clone, + { + assert!(N > 0); + let err = ConstantEvaluatorError::InvalidMathArg; + let mut exprs = exprs.into_iter(); + + macro_rules! sanitize { + ($expr:expr) => { + eval.eval_zero_value_and_splat($expr, span) + .map(|expr| &eval.expressions[expr]) + }; + } + + let new_expr = match sanitize!(exprs.next().unwrap())? { + $( + &Expression::Literal(Literal::$literal(x)) => iter::once(Ok(x)) + .chain(exprs.map(|expr| { + sanitize!(expr).and_then(|expr| match expr { + &Expression::Literal(Literal::$literal(x)) => Ok(x), + _ => Err(err.clone()), + }) + })) + .collect::, _>>() + .map(|a| a.into_inner().unwrap()) + .map($target::$mapping) + .and_then(|comps| Ok(handler(comps)?.into())), + )+ + &Expression::Compose { ty, ref components } => match &eval.types[ty].inner { + &TypeInner::Vector { size, scalar } => match scalar.kind { + $(ScalarKind::$scalar_kind)|* => { + let first_ty = ty; + let mut component_groups = + ArrayVec::, N>::new(); + component_groups.push(crate::proc::flatten_compose( + first_ty, + components, + eval.expressions, + eval.types, + ).collect()); + component_groups.extend( + exprs + .map(|expr| { + sanitize!(expr).and_then(|expr| match expr { + &Expression::Compose { ty, ref components } + if &eval.types[ty].inner + == &eval.types[first_ty].inner => + { + Ok(crate::proc::flatten_compose( + ty, + components, + eval.expressions, + eval.types, + ).collect()) + } + _ => Err(err.clone()), + }) + }) + .collect::, _>>( + )?, + ); + let component_groups = component_groups.into_inner().unwrap(); + let mut new_components = + ArrayVec::<_, { crate::VectorSize::MAX }>::new(); + for idx in 0..(size as u8).into() { + let group = component_groups + .iter() + .map(|cs| cs[idx]) + .collect::>() + .into_inner() + .unwrap(); + new_components.push($ident( + eval, + span, + group, + handler.clone(), + )?); + } + Ok(Expression::Compose { + ty: first_ty, + components: new_components.into_iter().collect(), + }) + } + _ => return Err(err), + }, + _ => return Err(err), + }, + _ => return Err(err), + }?; + eval.register_evaluated_expr(new_expr, span) + } + + with_dollar_sign! { + ($d:tt) => { + #[allow(unused)] + #[doc = concat!( + "A convenience macro for using the same RHS for each [`", + stringify!($target), + "`] variant in a call to [`", + stringify!($ident), + "`].", + )] + macro_rules! $ident { + ( + $eval:expr, + $span:expr, + [$d ($d expr:expr),+ $d (,)?], + |$d ($d arg:ident),+| $d tt:tt + ) => { + $ident($eval, $span, [$d ($d expr),+], |args| match args { + $( + $target::$mapping([$d ($d arg),+]) => { + let res = $d tt; + Result::map(res, $target::$mapping) + }, + )+ + }) + }; + } + }; + } + }; +} + +gen_component_wise_extractor! { + component_wise_scalar -> Scalar, + literals: [ + AbstractFloat => AbstractFloat: f64, + F32 => F32: f32, + AbstractInt => AbstractInt: i64, + U32 => U32: u32, + I32 => I32: i32, + ], + scalar_kinds: [ + Float, + AbstractFloat, + Sint, + Uint, + AbstractInt, + ], +} + +gen_component_wise_extractor! { + component_wise_float -> Float, + literals: [ + AbstractFloat => Abstract: f64, + F32 => F32: f32, + ], + scalar_kinds: [ + Float, + AbstractFloat, + ], +} + #[derive(Debug)] enum Behavior { Wgsl, @@ -592,186 +810,111 @@ impl<'a> ConstantEvaluator<'a> { } match fun { - crate::MathFunction::Pow => self.math_pow(arg, arg1.unwrap(), span), - crate::MathFunction::Clamp => self.math_clamp(arg, arg1.unwrap(), arg2.unwrap(), span), - fun => Err(ConstantEvaluatorError::NotImplemented(format!( - "{fun:?} built-in function" - ))), - } - } - - fn math_pow( - &mut self, - e1: Handle, - e2: Handle, - span: Span, - ) -> Result, ConstantEvaluatorError> { - let e1 = self.eval_zero_value_and_splat(e1, span)?; - let e2 = self.eval_zero_value_and_splat(e2, span)?; - - let expr = match (&self.expressions[e1], &self.expressions[e2]) { - (&Expression::Literal(Literal::F32(a)), &Expression::Literal(Literal::F32(b))) => { - Expression::Literal(Literal::F32(a.powf(b))) + crate::MathFunction::Abs => { + component_wise_scalar(self, span, [arg], |args| match args { + Scalar::AbstractFloat([e]) => Ok(Scalar::AbstractFloat([e.abs()])), + Scalar::F32([e]) => Ok(Scalar::F32([e.abs()])), + Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])), + Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])), + Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz + }) } - ( - &Expression::Compose { - components: ref src_components0, - ty: ty0, - }, - &Expression::Compose { - components: ref src_components1, - ty: ty1, - }, - ) if ty0 == ty1 - && matches!( - self.types[ty0].inner, - crate::TypeInner::Vector { - scalar: crate::Scalar { - kind: ScalarKind::Float, - .. - }, - .. - } - ) => - { - let mut components: Vec<_> = crate::proc::flatten_compose( - ty0, - src_components0, - self.expressions, - self.types, - ) - .chain(crate::proc::flatten_compose( - ty1, - src_components1, - self.expressions, - self.types, - )) - .collect(); - - let mid = components.len() / 2; - let (first, last) = components.split_at_mut(mid); - for (a, b) in first.iter_mut().zip(&*last) { - *a = self.math_pow(*a, *b, span)?; - } - components.truncate(mid); - - Expression::Compose { - ty: ty0, - components, - } + crate::MathFunction::Acos => { + component_wise_float!(self, span, [arg], |e| { Ok([e.acos()]) }) } - _ => return Err(ConstantEvaluatorError::InvalidMathArg), - }; - - self.register_evaluated_expr(expr, span) - } - - fn math_clamp( - &mut self, - e: Handle, - low: Handle, - high: Handle, - span: Span, - ) -> Result, ConstantEvaluatorError> { - let e = self.eval_zero_value_and_splat(e, span)?; - let low = self.eval_zero_value_and_splat(low, span)?; - let high = self.eval_zero_value_and_splat(high, span)?; - - let expr = match ( - &self.expressions[e], - &self.expressions[low], - &self.expressions[high], - ) { - (&Expression::Literal(e), &Expression::Literal(low), &Expression::Literal(high)) => { - let literal = match (e, low, high) { - (Literal::I32(e), Literal::I32(low), Literal::I32(high)) => { - if low > high { - return Err(ConstantEvaluatorError::InvalidClamp); - } else { - Literal::I32(e.clamp(low, high)) - } - } - (Literal::U32(e), Literal::U32(low), Literal::U32(high)) => { + crate::MathFunction::Acosh => { + component_wise_float!(self, span, [arg], |e| { Ok([e.acosh()]) }) + } + crate::MathFunction::Asin => { + component_wise_float!(self, span, [arg], |e| { Ok([e.asin()]) }) + } + crate::MathFunction::Asinh => { + component_wise_float!(self, span, [arg], |e| { Ok([e.asinh()]) }) + } + crate::MathFunction::Atan => { + component_wise_float!(self, span, [arg], |e| { Ok([e.atan()]) }) + } + crate::MathFunction::Atanh => { + component_wise_float!(self, span, [arg], |e| { Ok([e.atanh()]) }) + } + crate::MathFunction::Pow => { + component_wise_float!(self, span, [arg, arg1.unwrap()], |e1, e2| { + Ok([e1.powf(e2)]) + }) + } + crate::MathFunction::Clamp => { + component_wise_scalar!( + self, + span, + [arg, arg1.unwrap(), arg2.unwrap()], + |e, low, high| { if low > high { - return Err(ConstantEvaluatorError::InvalidClamp); + Err(ConstantEvaluatorError::InvalidClamp) } else { - Literal::U32(e.clamp(low, high)) + Ok([e.clamp(low, high)]) } } - (Literal::F32(e), Literal::F32(low), Literal::F32(high)) => { - if low > high { - return Err(ConstantEvaluatorError::InvalidClamp); + ) + } + crate::MathFunction::Cos => { + component_wise_float!(self, span, [arg], |e| { Ok([e.cos()]) }) + } + crate::MathFunction::Cosh => { + component_wise_float!(self, span, [arg], |e| { Ok([e.cosh()]) }) + } + crate::MathFunction::Round => { + // TODO: Use `f{32,64}.round_ties_even()` when available on stable. This polyfill + // is shamelessly [~~stolen from~~ inspired by `ndarray-image`][polyfill source], + // which has licensing compatible with ours. See also + // . + // + // [polyfill source]: https://github.com/imeka/ndarray-ndimage/blob/8b14b4d6ecfbc96a8a052f802e342a7049c68d8f/src/lib.rs#L98 + fn round_ties_even(x: f64) -> f64 { + let i = x as i64; + let f = (x - i as f64).abs(); + if f == 0.5 { + if i & 1 == 1 { + // -1.5, 1.5, 3.5, ... + (x.abs() + 0.5).copysign(x) } else { - Literal::F32(e.clamp(low, high)) + (x.abs() - 0.5).copysign(x) } + } else { + x.round() } - _ => return Err(ConstantEvaluatorError::InvalidMathArg), - }; - Expression::Literal(literal) - } - ( - &Expression::Compose { - components: ref src_components0, - ty: ty0, - }, - &Expression::Compose { - components: ref src_components1, - ty: ty1, - }, - &Expression::Compose { - components: ref src_components2, - ty: ty2, - }, - ) if ty0 == ty1 - && ty0 == ty2 - && matches!( - self.types[ty0].inner, - crate::TypeInner::Vector { - scalar: crate::Scalar { - kind: ScalarKind::Float, - .. - }, - .. - } - ) => - { - let mut components: Vec<_> = crate::proc::flatten_compose( - ty0, - src_components0, - self.expressions, - self.types, - ) - .chain(crate::proc::flatten_compose( - ty1, - src_components1, - self.expressions, - self.types, - )) - .chain(crate::proc::flatten_compose( - ty2, - src_components2, - self.expressions, - self.types, - )) - .collect(); - - let chunk_size = components.len() / 3; - let (es, rem) = components.split_at_mut(chunk_size); - let (lows, highs) = rem.split_at(chunk_size); - for ((e, low), high) in es.iter_mut().zip(lows).zip(highs) { - *e = self.math_clamp(*e, *low, *high, span)?; - } - components.truncate(chunk_size); - - Expression::Compose { - ty: ty0, - components, } + component_wise_float(self, span, [arg], |e| match e { + Float::Abstract([e]) => Ok(Float::Abstract([round_ties_even(e)])), + Float::F32([e]) => Ok(Float::F32([(round_ties_even(e as f64) as f32)])), + }) } - _ => return Err(ConstantEvaluatorError::InvalidMathArg), - }; - - self.register_evaluated_expr(expr, span) + crate::MathFunction::Saturate => { + component_wise_float!(self, span, [arg], |e| { Ok([e.clamp(0., 1.)]) }) + } + crate::MathFunction::Sin => { + component_wise_float!(self, span, [arg], |e| { Ok([e.sin()]) }) + } + crate::MathFunction::Sinh => { + component_wise_float!(self, span, [arg], |e| { Ok([e.sinh()]) }) + } + crate::MathFunction::Tan => { + component_wise_float!(self, span, [arg], |e| { Ok([e.tan()]) }) + } + crate::MathFunction::Tanh => { + component_wise_float!(self, span, [arg], |e| { Ok([e.tanh()]) }) + } + crate::MathFunction::Sqrt => { + component_wise_float!(self, span, [arg], |e| { Ok([e.sqrt()]) }) + } + crate::MathFunction::Step => { + component_wise_float!(self, span, [arg, arg1.unwrap()], |edge, x| { + Ok([if edge <= x { 1.0 } else { 0.0 }]) + }) + } + fun => Err(ConstantEvaluatorError::NotImplemented(format!( + "{fun:?} built-in function" + ))), + } } fn array_length( @@ -1144,7 +1287,12 @@ impl<'a> ConstantEvaluator<'a> { return self.cast(expr, target, span); }; - let crate::TypeInner::Array { base: _, size, stride: _ } = self.types[ty].inner else { + let crate::TypeInner::Array { + base: _, + size, + stride: _, + } = self.types[ty].inner + else { return self.cast(expr, target, span); }; diff --git a/naga/tests/out/glsl/math-functions.main.Fragment.glsl b/naga/tests/out/glsl/math-functions.main.Fragment.glsl index ed81535ab5f..bf0561f12e9 100644 --- a/naga/tests/out/glsl/math-functions.main.Fragment.glsl +++ b/naga/tests/out/glsl/math-functions.main.Fragment.glsl @@ -67,7 +67,7 @@ void main() { float sign_c = sign(-1.0); vec4 sign_d = sign(vec4(-1.0)); int const_dot = ( + ivec2(0).x * ivec2(0).x + ivec2(0).y * ivec2(0).y); - uint first_leading_bit_abs = uint(findMSB(uint(abs(int(0u))))); + uint first_leading_bit_abs = uint(findMSB(0u)); int flb_a = findMSB(-1); ivec2 flb_b = findMSB(ivec2(-1)); uvec2 flb_c = uvec2(findMSB(uvec2(1u))); @@ -85,8 +85,8 @@ void main() { ivec2 ctz_h = ivec2(min(uvec2(findLSB(ivec2(1))), uvec2(32u))); int clz_a = (-1 < 0 ? 0 : 31 - findMSB(-1)); uint clz_b = uint(31 - findMSB(1u)); - ivec2 _e68 = ivec2(-1); - ivec2 clz_c = mix(ivec2(31) - findMSB(_e68), ivec2(0), lessThan(_e68, ivec2(0))); + ivec2 _e67 = ivec2(-1); + ivec2 clz_c = mix(ivec2(31) - findMSB(_e67), ivec2(0), lessThan(_e67, ivec2(0))); uvec2 clz_d = uvec2(ivec2(31) - findMSB(uvec2(1u))); float lde_a = ldexp(1.0, 2); vec2 lde_b = ldexp(vec2(1.0, 2.0), ivec2(3, 4)); diff --git a/naga/tests/out/hlsl/math-functions.hlsl b/naga/tests/out/hlsl/math-functions.hlsl index 53d3acf0c12..5da3461dae7 100644 --- a/naga/tests/out/hlsl/math-functions.hlsl +++ b/naga/tests/out/hlsl/math-functions.hlsl @@ -77,7 +77,7 @@ void main() float sign_c = sign(-1.0); float4 sign_d = sign((-1.0).xxxx); int const_dot = dot((int2)0, (int2)0); - uint first_leading_bit_abs = firstbithigh(abs(0u)); + uint first_leading_bit_abs = firstbithigh(0u); int flb_a = asint(firstbithigh(-1)); int2 flb_b = asint(firstbithigh((-1).xx)); uint2 flb_c = firstbithigh((1u).xx); @@ -95,8 +95,8 @@ void main() int2 ctz_h = asint(min((32u).xx, firstbitlow((1).xx))); int clz_a = (-1 < 0 ? 0 : 31 - asint(firstbithigh(-1))); uint clz_b = (31u - firstbithigh(1u)); - int2 _expr68 = (-1).xx; - int2 clz_c = (_expr68 < (0).xx ? (0).xx : (31).xx - asint(firstbithigh(_expr68))); + int2 _expr67 = (-1).xx; + int2 clz_c = (_expr67 < (0).xx ? (0).xx : (31).xx - asint(firstbithigh(_expr67))); uint2 clz_d = ((31u).xx - firstbithigh((1u).xx)); float lde_a = ldexp(1.0, 2); float2 lde_b = ldexp(float2(1.0, 2.0), int2(3, 4)); diff --git a/naga/tests/out/msl/math-functions.msl b/naga/tests/out/msl/math-functions.msl index d93e502dc66..45fbcd00a16 100644 --- a/naga/tests/out/msl/math-functions.msl +++ b/naga/tests/out/msl/math-functions.msl @@ -70,13 +70,12 @@ fragment void main_( float sign_c = metal::sign(-1.0); metal::float4 sign_d = metal::sign(metal::float4(-1.0)); int const_dot = ( + metal::int2 {}.x * metal::int2 {}.x + metal::int2 {}.y * metal::int2 {}.y); - uint _e23 = metal::abs(0u); - uint first_leading_bit_abs = metal::select(31 - metal::clz(_e23), uint(-1), _e23 == 0 || _e23 == -1); + uint first_leading_bit_abs = metal::select(31 - metal::clz(0u), uint(-1), 0u == 0 || 0u == -1); int flb_a = metal::select(31 - metal::clz(metal::select(-1, ~-1, -1 < 0)), int(-1), -1 == 0 || -1 == -1); - metal::int2 _e28 = metal::int2(-1); - metal::int2 flb_b = metal::select(31 - metal::clz(metal::select(_e28, ~_e28, _e28 < 0)), int2(-1), _e28 == 0 || _e28 == -1); - metal::uint2 _e31 = metal::uint2(1u); - metal::uint2 flb_c = metal::select(31 - metal::clz(_e31), uint2(-1), _e31 == 0 || _e31 == -1); + metal::int2 _e27 = metal::int2(-1); + metal::int2 flb_b = metal::select(31 - metal::clz(metal::select(_e27, ~_e27, _e27 < 0)), int2(-1), _e27 == 0 || _e27 == -1); + metal::uint2 _e30 = metal::uint2(1u); + metal::uint2 flb_c = metal::select(31 - metal::clz(_e30), uint2(-1), _e30 == 0 || _e30 == -1); int ftb_a = (((metal::ctz(-1) + 1) % 33) - 1); uint ftb_b = (((metal::ctz(1u) + 1) % 33) - 1); metal::int2 ftb_c = (((metal::ctz(metal::int2(-1)) + 1) % 33) - 1); diff --git a/naga/tests/out/spv/debug-symbol-terrain.spvasm b/naga/tests/out/spv/debug-symbol-terrain.spvasm index 623b8dc2c1b..fd8e7f5df3b 100644 --- a/naga/tests/out/spv/debug-symbol-terrain.spvasm +++ b/naga/tests/out/spv/debug-symbol-terrain.spvasm @@ -366,10 +366,10 @@ OpName %91 "x12" OpName %94 "m" OpName %203 "p" OpName %204 "fbm" -OpName %209 "x" -OpName %211 "v" -OpName %213 "a" -OpName %214 "i" +OpName %212 "x" +OpName %214 "v" +OpName %216 "a" +OpName %217 "i" OpName %255 "p" OpName %256 "min_max_height" OpName %257 "terrain_point" @@ -575,9 +575,12 @@ OpDecorate %582 Location 0 %206 = OpConstant %5 0.01 %207 = OpConstant %5 100.0 %208 = OpConstantComposite %6 %207 %207 -%210 = OpConstantNull %6 -%212 = OpTypePointer Function %5 -%215 = OpTypePointer Function %8 +%209 = OpConstant %5 0.87758255 +%210 = OpConstant %5 0.47942555 +%211 = OpConstantComposite %6 %209 %210 +%213 = OpConstantNull %6 +%215 = OpTypePointer Function %5 +%218 = OpTypePointer Function %8 %258 = OpTypeFunction %4 %6 %6 %271 = OpTypeFunction %14 %6 %6 %272 = OpConstant %5 0.1 @@ -835,28 +838,24 @@ OpFunctionEnd %204 = OpFunction %5 None %68 %203 = OpFunctionParameter %6 %202 = OpLabel -%211 = OpVariable %212 Function %74 -%214 = OpVariable %215 Function %135 -%209 = OpVariable %87 Function %210 -%213 = OpVariable %212 Function %78 -OpBranch %216 -%216 = OpLabel +%214 = OpVariable %215 Function %74 +%217 = OpVariable %218 Function %135 +%212 = OpVariable %87 Function %213 +%216 = OpVariable %215 Function %78 +OpBranch %219 +%219 = OpLabel OpLine %3 36 13 -%217 = OpVectorTimesScalar %6 %203 %206 +%220 = OpVectorTimesScalar %6 %203 %206 OpLine %3 36 5 -OpStore %209 %217 +OpStore %212 %220 OpLine %3 39 17 -OpLine %3 40 24 -%218 = OpExtInst %5 %1 Cos %78 OpLine %3 40 14 -%219 = OpExtInst %5 %1 Sin %78 -%220 = OpCompositeConstruct %6 %218 %219 OpLine %3 41 15 -%221 = OpCompositeExtract %5 %220 0 -%222 = OpCompositeExtract %5 %220 1 -%223 = OpCompositeExtract %5 %220 1 +%221 = OpCompositeExtract %5 %211 0 +%222 = OpCompositeExtract %5 %211 1 +%223 = OpCompositeExtract %5 %211 1 %224 = OpFNegate %5 %223 -%225 = OpCompositeExtract %5 %220 0 +%225 = OpCompositeExtract %5 %211 0 %226 = OpCompositeConstruct %6 %221 %222 %227 = OpCompositeConstruct %6 %224 %225 %228 = OpCompositeConstruct %9 %226 %227 @@ -867,7 +866,7 @@ OpLoopMerge %230 %232 None OpBranch %231 %231 = OpLabel OpLine %3 43 22 -%233 = OpLoad %8 %214 +%233 = OpLoad %8 %217 %234 = OpULessThan %112 %233 %205 OpLine %3 43 21 OpSelectionMerge %235 None @@ -878,44 +877,44 @@ OpBranch %230 OpBranch %237 %237 = OpLabel OpLine %3 1 1 -%239 = OpLoad %5 %211 -%240 = OpLoad %5 %213 -%241 = OpLoad %6 %209 +%239 = OpLoad %5 %214 +%240 = OpLoad %5 %216 +%241 = OpLoad %6 %212 OpLine %3 44 21 %242 = OpFunctionCall %5 %67 %241 OpLine %3 44 13 %243 = OpFMul %5 %240 %242 %244 = OpFAdd %5 %239 %243 OpLine %3 44 9 -OpStore %211 %244 +OpStore %214 %244 OpLine %3 45 13 -%245 = OpLoad %6 %209 +%245 = OpLoad %6 %212 %246 = OpMatrixTimesVector %6 %228 %245 OpLine %3 45 13 %247 = OpVectorTimesScalar %6 %246 %81 %248 = OpFAdd %6 %247 %208 OpLine %3 45 9 -OpStore %209 %248 +OpStore %212 %248 OpLine %3 1 1 -%249 = OpLoad %5 %213 +%249 = OpLoad %5 %216 OpLine %3 46 13 %250 = OpFMul %5 %249 %78 OpLine %3 46 9 -OpStore %213 %250 +OpStore %216 %250 OpBranch %238 %238 = OpLabel OpBranch %232 %232 = OpLabel OpLine %3 1 1 -%251 = OpLoad %8 %214 +%251 = OpLoad %8 %217 OpLine %3 43 43 %252 = OpIAdd %8 %251 %126 OpLine %3 43 39 -OpStore %214 %252 +OpStore %217 %252 OpBranch %229 %230 = OpLabel OpLine %3 1 1 -%253 = OpLoad %5 %211 +%253 = OpLoad %5 %214 OpReturnValue %253 OpFunctionEnd %257 = OpFunction %4 None %258 @@ -1193,8 +1192,8 @@ OpReturn OpFunctionEnd %465 = OpFunction %2 None %346 %453 = OpLabel -%468 = OpVariable %212 Function %74 -%469 = OpVariable %215 Function %135 +%468 = OpVariable %215 Function %74 +%469 = OpVariable %218 Function %135 %456 = OpLoad %8 %455 %459 = OpLoad %7 %457 %462 = OpLoad %6 %460 diff --git a/naga/tests/out/spv/math-functions.spvasm b/naga/tests/out/spv/math-functions.spvasm index ba3e7cffb9a..bbbf3709700 100644 --- a/naga/tests/out/spv/math-functions.spvasm +++ b/naga/tests/out/spv/math-functions.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 126 +; Bound: 125 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 @@ -61,10 +61,10 @@ OpMemberDecorate %13 1 Offset 16 %45 = OpConstantComposite %3 %43 %43 %43 %43 %52 = OpConstantComposite %3 %17 %17 %17 %17 %59 = OpConstantNull %6 -%77 = OpConstant %25 32 -%86 = OpConstantComposite %29 %77 %77 -%95 = OpConstant %6 31 -%100 = OpConstantComposite %5 %95 %95 +%76 = OpConstant %25 32 +%85 = OpConstantComposite %29 %76 %76 +%94 = OpConstant %6 31 +%99 = OpConstantComposite %5 %94 %94 %15 = OpFunction %2 None %16 %14 = OpLabel OpBranch %46 @@ -87,60 +87,59 @@ OpBranch %46 %65 = OpCompositeExtract %6 %24 1 %66 = OpIMul %6 %64 %65 %58 = OpIAdd %6 %63 %66 -%67 = OpCopyObject %25 %26 -%68 = OpExtInst %25 %1 FindUMsb %67 -%69 = OpExtInst %6 %1 FindSMsb %20 -%70 = OpExtInst %5 %1 FindSMsb %27 -%71 = OpExtInst %29 %1 FindUMsb %30 -%72 = OpExtInst %6 %1 FindILsb %20 -%73 = OpExtInst %25 %1 FindILsb %28 -%74 = OpExtInst %5 %1 FindILsb %27 -%75 = OpExtInst %29 %1 FindILsb %30 -%78 = OpExtInst %25 %1 FindILsb %26 -%76 = OpExtInst %25 %1 UMin %77 %78 -%80 = OpExtInst %6 %1 FindILsb %31 -%79 = OpExtInst %6 %1 UMin %77 %80 -%82 = OpExtInst %25 %1 FindILsb %32 -%81 = OpExtInst %25 %1 UMin %77 %82 -%84 = OpExtInst %6 %1 FindILsb %20 -%83 = OpExtInst %6 %1 UMin %77 %84 -%87 = OpExtInst %29 %1 FindILsb %33 -%85 = OpExtInst %29 %1 UMin %86 %87 -%89 = OpExtInst %5 %1 FindILsb %34 -%88 = OpExtInst %5 %1 UMin %86 %89 -%91 = OpExtInst %29 %1 FindILsb %30 -%90 = OpExtInst %29 %1 UMin %86 %91 -%93 = OpExtInst %5 %1 FindILsb %36 -%92 = OpExtInst %5 %1 UMin %86 %93 -%96 = OpExtInst %6 %1 FindUMsb %20 -%94 = OpISub %6 %95 %96 -%98 = OpExtInst %6 %1 FindUMsb %28 -%97 = OpISub %25 %95 %98 -%101 = OpExtInst %5 %1 FindUMsb %27 -%99 = OpISub %5 %100 %101 -%103 = OpExtInst %5 %1 FindUMsb %30 -%102 = OpISub %29 %100 %103 -%104 = OpExtInst %4 %1 Ldexp %17 %37 -%105 = OpExtInst %7 %1 Ldexp %39 %42 +%67 = OpExtInst %25 %1 FindUMsb %26 +%68 = OpExtInst %6 %1 FindSMsb %20 +%69 = OpExtInst %5 %1 FindSMsb %27 +%70 = OpExtInst %29 %1 FindUMsb %30 +%71 = OpExtInst %6 %1 FindILsb %20 +%72 = OpExtInst %25 %1 FindILsb %28 +%73 = OpExtInst %5 %1 FindILsb %27 +%74 = OpExtInst %29 %1 FindILsb %30 +%77 = OpExtInst %25 %1 FindILsb %26 +%75 = OpExtInst %25 %1 UMin %76 %77 +%79 = OpExtInst %6 %1 FindILsb %31 +%78 = OpExtInst %6 %1 UMin %76 %79 +%81 = OpExtInst %25 %1 FindILsb %32 +%80 = OpExtInst %25 %1 UMin %76 %81 +%83 = OpExtInst %6 %1 FindILsb %20 +%82 = OpExtInst %6 %1 UMin %76 %83 +%86 = OpExtInst %29 %1 FindILsb %33 +%84 = OpExtInst %29 %1 UMin %85 %86 +%88 = OpExtInst %5 %1 FindILsb %34 +%87 = OpExtInst %5 %1 UMin %85 %88 +%90 = OpExtInst %29 %1 FindILsb %30 +%89 = OpExtInst %29 %1 UMin %85 %90 +%92 = OpExtInst %5 %1 FindILsb %36 +%91 = OpExtInst %5 %1 UMin %85 %92 +%95 = OpExtInst %6 %1 FindUMsb %20 +%93 = OpISub %6 %94 %95 +%97 = OpExtInst %6 %1 FindUMsb %28 +%96 = OpISub %25 %94 %97 +%100 = OpExtInst %5 %1 FindUMsb %27 +%98 = OpISub %5 %99 %100 +%102 = OpExtInst %5 %1 FindUMsb %30 +%101 = OpISub %29 %99 %102 +%103 = OpExtInst %4 %1 Ldexp %17 %37 +%104 = OpExtInst %7 %1 Ldexp %39 %42 +%105 = OpExtInst %8 %1 ModfStruct %43 %106 = OpExtInst %8 %1 ModfStruct %43 -%107 = OpExtInst %8 %1 ModfStruct %43 -%108 = OpCompositeExtract %4 %107 0 -%109 = OpExtInst %8 %1 ModfStruct %43 -%110 = OpCompositeExtract %4 %109 1 -%111 = OpExtInst %9 %1 ModfStruct %44 -%112 = OpExtInst %10 %1 ModfStruct %45 -%113 = OpCompositeExtract %3 %112 1 -%114 = OpCompositeExtract %4 %113 0 -%115 = OpExtInst %9 %1 ModfStruct %44 -%116 = OpCompositeExtract %7 %115 0 -%117 = OpCompositeExtract %4 %116 1 +%107 = OpCompositeExtract %4 %106 0 +%108 = OpExtInst %8 %1 ModfStruct %43 +%109 = OpCompositeExtract %4 %108 1 +%110 = OpExtInst %9 %1 ModfStruct %44 +%111 = OpExtInst %10 %1 ModfStruct %45 +%112 = OpCompositeExtract %3 %111 1 +%113 = OpCompositeExtract %4 %112 0 +%114 = OpExtInst %9 %1 ModfStruct %44 +%115 = OpCompositeExtract %7 %114 0 +%116 = OpCompositeExtract %4 %115 1 +%117 = OpExtInst %11 %1 FrexpStruct %43 %118 = OpExtInst %11 %1 FrexpStruct %43 -%119 = OpExtInst %11 %1 FrexpStruct %43 -%120 = OpCompositeExtract %4 %119 0 -%121 = OpExtInst %11 %1 FrexpStruct %43 -%122 = OpCompositeExtract %6 %121 1 -%123 = OpExtInst %13 %1 FrexpStruct %45 -%124 = OpCompositeExtract %12 %123 1 -%125 = OpCompositeExtract %6 %124 0 +%119 = OpCompositeExtract %4 %118 0 +%120 = OpExtInst %11 %1 FrexpStruct %43 +%121 = OpCompositeExtract %6 %120 1 +%122 = OpExtInst %13 %1 FrexpStruct %45 +%123 = OpCompositeExtract %12 %122 1 +%124 = OpCompositeExtract %6 %123 0 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/math-functions.wgsl b/naga/tests/out/wgsl/math-functions.wgsl index 92f391038db..ce38fee9869 100644 --- a/naga/tests/out/wgsl/math-functions.wgsl +++ b/naga/tests/out/wgsl/math-functions.wgsl @@ -12,7 +12,7 @@ fn main() { let sign_c = sign(-1f); let sign_d = sign(vec4(-1f)); let const_dot = dot(vec2(), vec2()); - let first_leading_bit_abs = firstLeadingBit(abs(0u)); + let first_leading_bit_abs = firstLeadingBit(0u); let flb_a = firstLeadingBit(-1i); let flb_b = firstLeadingBit(vec2(-1i)); let flb_c = firstLeadingBit(vec2(1u)); diff --git a/tests/src/expectations.rs b/tests/src/expectations.rs index ee48e83aa8f..4049f910b6b 100644 --- a/tests/src/expectations.rs +++ b/tests/src/expectations.rs @@ -1,3 +1,5 @@ +use core::fmt; + /// Conditions under which a test should fail or be skipped. /// /// By passing a `FailureCase` to [`TestParameters::expect_fail`][expect_fail], you can @@ -15,7 +17,7 @@ /// vendor: None, /// adapter: Some("RTX"), /// driver: None, -/// reasons: vec![FailureReason::ValidationError(Some("Some error substring"))], +/// reasons: vec![FailureReason::validation_error().with_message("Some error substring")], /// behavior: FailureBehavior::AssertFailure, /// } /// # ; @@ -158,7 +160,7 @@ impl FailureCase { /// Return the reasons why this case should fail. pub fn reasons(&self) -> &[FailureReason] { if self.reasons.is_empty() { - std::array::from_ref(&FailureReason::Any) + std::array::from_ref(&FailureReason::ANY) } else { &self.reasons } @@ -170,7 +172,8 @@ impl FailureCase { /// /// If multiple reasons are pushed, will match any of them. pub fn validation_error(mut self, msg: &'static str) -> Self { - self.reasons.push(FailureReason::ValidationError(Some(msg))); + self.reasons + .push(FailureReason::validation_error().with_message(msg)); self } @@ -180,7 +183,7 @@ impl FailureCase { /// /// If multiple reasons are pushed, will match any of them. pub fn panic(mut self, msg: &'static str) -> Self { - self.reasons.push(FailureReason::Panic(Some(msg))); + self.reasons.push(FailureReason::panic().with_message(msg)); self } @@ -247,43 +250,16 @@ impl FailureCase { /// Returns true if the given failure "satisfies" this failure case. pub(crate) fn matches_failure(&self, failure: &FailureResult) -> bool { for reason in self.reasons() { - let result = match (reason, failure) { - (FailureReason::Any, _) => { - log::error!("Matched failure case: Wildcard"); - true - } - (FailureReason::ValidationError(None), FailureResult::ValidationError(_)) => { - log::error!("Matched failure case: Any Validation Error"); - true - } - ( - FailureReason::ValidationError(Some(expected)), - FailureResult::ValidationError(Some(actual)), - ) => { - let result = actual.to_lowercase().contains(&expected.to_lowercase()); - if result { - log::error!( - "Matched failure case: Validation Error containing \"{}\"", - expected - ); - } - result - } - (FailureReason::Panic(None), FailureResult::Panic(_)) => { - log::error!("Matched failure case: Any Panic"); - true - } - (FailureReason::Panic(Some(expected)), FailureResult::Panic(Some(actual))) => { - let result = actual.to_lowercase().contains(&expected.to_lowercase()); - if result { - log::error!("Matched failure case: Panic containing \"{}\"", expected); - } - result - } - _ => false, - }; - - if result { + let kind_matched = reason.kind.map_or(true, |kind| kind == failure.kind); + + let message_matched = + reason + .message + .map_or(true, |message| matches!(&failure.message, Some(actual) if actual.to_lowercase().contains(&message.to_lowercase()))); + + if kind_matched && message_matched { + let message = failure.message.as_deref().unwrap_or("*no message*"); + log::error!("Matched {} {message}", failure.kind); return true; } } @@ -308,18 +284,54 @@ bitflags::bitflags! { /// /// If the test fails for a different reason, the given FailureCase will be ignored. #[derive(Default, Debug, Clone, PartialEq)] -pub enum FailureReason { - /// Matches any failure. - #[default] - Any, - /// Matches validation errors raised from the backend validation. +pub struct FailureReason { + /// Match a particular kind of failure result. /// - /// If a string is provided, matches only validation errors that contain the string. - ValidationError(Option<&'static str>), - /// A panic was raised. + /// If `None`, match any result kind. + kind: Option, + /// Match a particular message of a failure result. /// - /// If a string is provided, matches only panics that contain the string. - Panic(Option<&'static str>), + /// If `None`, matches any message. If `Some`, a case-insensitive sub-string + /// test is performed. Allowing `"error occured"` to match a message like + /// `"An unexpected Error occured!"`. + message: Option<&'static str>, +} + +impl FailureReason { + /// Match any failure reason. + const ANY: Self = Self { + kind: None, + message: None, + }; + + /// Match a validation error. + #[allow(dead_code)] // Not constructed on wasm + pub fn validation_error() -> Self { + Self { + kind: Some(FailureResultKind::ValidationError), + message: None, + } + } + + /// Match a panic. + pub fn panic() -> Self { + Self { + kind: Some(FailureResultKind::Panic), + message: None, + } + } + + /// Match an error with a message. + /// + /// If specified, a case-insensitive sub-string test is performed. Allowing + /// `"error occured"` to match a message like `"An unexpected Error + /// occured!"`. + pub fn with_message(self, message: &'static str) -> Self { + Self { + message: Some(message), + ..self + } + } } #[derive(Default, Clone)] @@ -336,11 +348,53 @@ pub enum FailureBehavior { Ignore, } +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) enum FailureResultKind { + #[allow(dead_code)] // Not constructed on wasm + ValidationError, + Panic, +} + +impl fmt::Display for FailureResultKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FailureResultKind::ValidationError => write!(f, "Validation Error"), + FailureResultKind::Panic => write!(f, "Panic"), + } + } +} + #[derive(Debug)] -pub(crate) enum FailureResult { +pub(crate) struct FailureResult { + kind: FailureResultKind, + message: Option, +} + +impl FailureResult { + /// Failure result is a panic. + pub(super) fn panic() -> Self { + Self { + kind: FailureResultKind::Panic, + message: None, + } + } + + /// Failure result is a validation error. #[allow(dead_code)] // Not constructed on wasm - ValidationError(Option), - Panic(Option), + pub(super) fn validation_error() -> Self { + Self { + kind: FailureResultKind::ValidationError, + message: None, + } + } + + /// Message associated with a failure result. + pub(super) fn with_message(self, message: impl fmt::Display) -> Self { + Self { + kind: self.kind, + message: Some(message.to_string()), + } + } } #[derive(PartialEq, Clone, Copy, Debug)] @@ -393,7 +447,8 @@ pub(crate) fn expectations_match_failures( if !actual.is_empty() { result = ExpectationMatchResult::Panic; for failure in actual { - log::error!("Unexpected failure due to: {:?}", failure); + let message = failure.message.as_deref().unwrap_or("*no message*"); + log::error!("{}: {message}", failure.kind); } } @@ -409,11 +464,11 @@ mod test { }; fn validation_err(msg: &'static str) -> FailureResult { - FailureResult::ValidationError(Some(String::from(msg))) + FailureResult::validation_error().with_message(msg) } fn panic(msg: &'static str) -> FailureResult { - FailureResult::Panic(Some(String::from(msg))) + FailureResult::panic().with_message(msg) } #[test] @@ -423,7 +478,7 @@ mod test { // -- Unexpected failure -- let expectation = vec![]; - let actual = vec![FailureResult::ValidationError(None)]; + let actual = vec![FailureResult::validation_error()]; assert_eq!( super::expectations_match_failures(&expectation, actual), @@ -443,7 +498,7 @@ mod test { // -- Expected failure (validation) -- let expectation = vec![FailureCase::always()]; - let actual = vec![FailureResult::ValidationError(None)]; + let actual = vec![FailureResult::validation_error()]; assert_eq!( super::expectations_match_failures(&expectation, actual), @@ -453,7 +508,7 @@ mod test { // -- Expected failure (panic) -- let expectation = vec![FailureCase::always()]; - let actual = vec![FailureResult::Panic(None)]; + let actual = vec![FailureResult::panic()]; assert_eq!( super::expectations_match_failures(&expectation, actual), @@ -469,9 +524,9 @@ mod test { let expectation: Vec = vec![FailureCase::always().validation_error("Some StrIng")]; - let actual = vec![FailureResult::ValidationError(Some(String::from( + let actual = vec![FailureResult::validation_error().with_message( "a very long string that contains sOmE sTrInG of different capitalization", - )))]; + )]; assert_eq!( super::expectations_match_failures(&expectation, actual), diff --git a/tests/src/image.rs b/tests/src/image.rs index 08e30ae2efb..4c1f6b8b740 100644 --- a/tests/src/image.rs +++ b/tests/src/image.rs @@ -238,7 +238,7 @@ pub async fn compare_image_output( ) .await; write_png( - difference_path, + &difference_path, width, height, &magma_image_with_alpha, @@ -247,7 +247,7 @@ pub async fn compare_image_output( .await; if !all_passed { - panic!("Image data mismatch!") + panic!("Image data mismatch: {}", difference_path.display()) } } diff --git a/tests/src/run.rs b/tests/src/run.rs index e19615bdb25..f56651b5742 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -86,23 +86,29 @@ pub async fn execute_test( .await; if let Err(panic) = panic_res { - let panic_str = panic.downcast_ref::<&'static str>(); - let panic_string = if let Some(&panic_str) = panic_str { - Some(panic_str.to_string()) + let message = panic + .downcast_ref::<&str>() + .copied() + .or_else(|| panic.downcast_ref::().map(String::as_str)); + + let result = FailureResult::panic(); + + let result = if let Some(panic_str) = message { + result.with_message(panic_str) } else { - panic.downcast_ref::().cloned() + result }; - failures.push(FailureResult::Panic(panic_string)) + failures.push(result) } // Check whether any validation errors were reported during the test run. cfg_if::cfg_if!( if #[cfg(any(not(target_arch = "wasm32"), target_os = "emscripten"))] { - failures.extend(wgpu::hal::VALIDATION_CANARY.get_and_reset().into_iter().map(|msg| FailureResult::ValidationError(Some(msg)))); + failures.extend(wgpu::hal::VALIDATION_CANARY.get_and_reset().into_iter().map(|msg| FailureResult::validation_error().with_message(msg))); } else if #[cfg(all(target_arch = "wasm32", feature = "webgl"))] { if _surface_guard.unwrap().check_for_unreported_errors() { - failures.push(FailureResult::ValidationError(None)); + failures.push(FailureResult::validation_error()); } } else { } diff --git a/tests/tests/root.rs b/tests/tests/root.rs index e3f116b0c70..f886c0f9eb6 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -34,6 +34,7 @@ mod shader; mod shader_primitive_index; mod shader_view_format; mod texture_bounds; +mod texture_view_creation; mod transfer; mod vertex_indices; mod write_texture; diff --git a/tests/tests/texture_view_creation.rs b/tests/tests/texture_view_creation.rs new file mode 100644 index 00000000000..eeede4c26fc --- /dev/null +++ b/tests/tests/texture_view_creation.rs @@ -0,0 +1,65 @@ +use wgpu::*; +use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters}; + +#[gpu_test] +static STENCIL_ONLY_VIEW_CREATION: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .skip(FailureCase::webgl2()) // WebGL doesn't have stencil only views + .limits(wgpu::Limits::downlevel_defaults()), + ) + .run_async(|ctx| async move { + for format in [TextureFormat::Stencil8, TextureFormat::Depth24PlusStencil8] { + let texture = ctx.device.create_texture(&TextureDescriptor { + label: None, + size: Extent3d { + width: 256, + height: 256, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format, + usage: TextureUsages::COPY_DST + | TextureUsages::COPY_SRC + | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + let _view = texture.create_view(&TextureViewDescriptor { + aspect: TextureAspect::StencilOnly, + ..Default::default() + }); + } + }); + +#[gpu_test] +static DEPTH_ONLY_VIEW_CREATION: GpuTestConfiguration = + GpuTestConfiguration::new().run_async(|ctx| async move { + for format in [ + TextureFormat::Depth16Unorm, + TextureFormat::Depth24Plus, + TextureFormat::Depth24PlusStencil8, + ] { + let texture = ctx.device.create_texture(&TextureDescriptor { + label: None, + size: Extent3d { + width: 256, + height: 256, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format, + usage: TextureUsages::COPY_DST + | TextureUsages::COPY_SRC + | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + let _view = texture.create_view(&TextureViewDescriptor { + aspect: TextureAspect::DepthOnly, + ..Default::default() + }); + } + }); diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 5c2b4303389..28edac35b55 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-core" -version = "0.18.0" +version = "0.19.0" authors = ["gfx-rs developers"] edition = "2021" description = "WebGPU core logic on wgpu-hal" @@ -109,18 +109,18 @@ thiserror = "1" [dependencies.naga] path = "../naga" -version = "0.14.0" +version = "0.19.0" features = ["clone"] [dependencies.wgt] package = "wgpu-types" path = "../wgpu-types" -version = "0.18.0" +version = "0.19.0" [dependencies.hal] package = "wgpu-hal" path = "../wgpu-hal" -version = "0.18.0" +version = "0.19.0" default_features = false [target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index fa0c4d7dbee..f7cfc1da154 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1982,10 +1982,12 @@ impl Global { } } - let num_frames = present::DESIRED_NUM_FRAMES - .clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end()); + let maximum_frame_latency = config.desired_maximum_frame_latency.clamp( + *caps.maximum_frame_latency.start(), + *caps.maximum_frame_latency.end(), + ); let mut hal_config = hal::SurfaceConfiguration { - swap_chain_size: num_frames, + maximum_frame_latency, present_mode: config.present_mode, composite_alpha_mode: config.alpha_mode, format: config.format, @@ -2056,7 +2058,6 @@ impl Global { *presentation = Some(present::Presentation { device: super::any_device::AnyDevice::new(device.clone()), config: config.clone(), - num_frames, acquired_texture: None, }); } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index be5c202ffbf..43a2d1d982d 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -24,6 +24,7 @@ use crate::{ use hal::{CommandEncoder as _, Device as _, Queue as _}; use parking_lot::Mutex; +use smallvec::SmallVec; use std::{ iter, mem, ptr, @@ -1115,10 +1116,13 @@ impl Global { .fetch_add(1, Ordering::Relaxed) + 1; let mut active_executions = Vec::new(); + let mut used_surface_textures = track::TextureUsageScope::new(); let snatch_guard = device.snatchable_lock.read(); + let mut submit_surface_textures_owned = SmallVec::<[_; 2]>::new(); + { let mut command_buffer_guard = hub.command_buffers.write(); @@ -1217,8 +1221,17 @@ impl Global { return Err(QueueSubmitError::DestroyedTexture(id)); } Some(TextureInner::Native { .. }) => false, - Some(TextureInner::Surface { ref has_work, .. }) => { + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + true } }; @@ -1409,8 +1422,17 @@ impl Global { return Err(QueueSubmitError::DestroyedTexture(id)); } Some(TextureInner::Native { .. }) => {} - Some(TextureInner::Surface { ref has_work, .. }) => { + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + unsafe { used_surface_textures .merge_single(texture, None, hal::TextureUses::PRESENT) @@ -1449,12 +1471,23 @@ impl Global { .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()), ) .collect::>(); + + let mut submit_surface_textures = + SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len()); + + for texture in &submit_surface_textures_owned { + submit_surface_textures.extend(match texture.inner.get(&snatch_guard) { + Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), + _ => None, + }); + } + unsafe { queue .raw .as_ref() .unwrap() - .submit(&refs, Some((fence, submit_index))) + .submit(&refs, &submit_surface_textures, Some((fence, submit_index))) .map_err(DeviceError::from)?; } diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 00dc049679a..d7b34497a2d 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -37,14 +37,11 @@ use thiserror::Error; use wgt::SurfaceStatus as Status; const FRAME_TIMEOUT_MS: u32 = 1000; -pub const DESIRED_NUM_FRAMES: u32 = 3; #[derive(Debug)] pub(crate) struct Presentation { pub(crate) device: AnyDevice, pub(crate) config: wgt::SurfaceConfiguration>, - #[allow(unused)] - pub(crate) num_frames: u32, pub(crate) acquired_texture: Option, } diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index b85a518dc4b..9dd24ee2b10 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-hal" -version = "0.18.0" +version = "0.19.0" authors = ["gfx-rs developers"] edition = "2021" description = "WebGPU hardware abstraction layer" @@ -85,12 +85,12 @@ rustc-hash = "1.1" log = "0.4" # backend: Gles -glow = { version = "0.13", git = "https://github.com/grovesNL/glow.git", rev = "29ff917a2b2ff7ce0a81b2cc5681de6d4735b36e", optional = true } +glow = { version = "0.13.1", optional = true } [dependencies.wgt] package = "wgpu-types" path = "../wgpu-types" -version = "0.18.0" +version = "0.19.0" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] # backend: Vulkan @@ -127,7 +127,7 @@ winapi = { version = "0.3", features = [ "winuser", "dcomp", ] } -d3d12 = { path = "../d3d12/", version = "0.7.0", optional = true, features = [ +d3d12 = { path = "../d3d12/", version = "0.19.0", optional = true, features = [ "libloading", ] } @@ -157,7 +157,7 @@ android_system_properties = "0.1.1" [dependencies.naga] path = "../naga" -version = "0.14.0" +version = "0.19.0" features = ["clone"] [build-dependencies] @@ -166,14 +166,14 @@ cfg_aliases.workspace = true # DEV dependencies [dev-dependencies.naga] path = "../naga" -version = "0.14.0" +version = "0.19.0" features = ["wgsl-in"] [dev-dependencies] cfg-if = "1" env_logger = "0.10" glam = "0.25.0" # for ray-traced-triangle example -winit = { version = "0.29.9", features = [ +winit = { version = "0.29.10", features = [ "android-native-activity", ] } # for "halmark" example diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 18f283d8e7f..c238f299e7d 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -23,7 +23,7 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0; const GRAVITY: f32 = -9.8 * 100.0; const MAX_VELOCITY: f32 = 750.0; const COMMAND_BUFFER_PER_CONTEXT: usize = 100; -const DESIRED_FRAMES: u32 = 3; +const DESIRED_MAX_LATENCY: u32 = 2; #[repr(C)] #[derive(Clone, Copy)] @@ -132,9 +132,9 @@ impl Example { let window_size: (u32, u32) = window.inner_size().into(); let surface_config = hal::SurfaceConfiguration { - swap_chain_size: DESIRED_FRAMES.clamp( - *surface_caps.swap_chain_sizes.start(), - *surface_caps.swap_chain_sizes.end(), + maximum_frame_latency: DESIRED_MAX_LATENCY.clamp( + *surface_caps.maximum_frame_latency.start(), + *surface_caps.maximum_frame_latency.end(), ), present_mode: wgt::PresentMode::Fifo, composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, @@ -490,7 +490,7 @@ impl Example { let mut fence = device.create_fence().unwrap(); let init_cmd = cmd_encoder.end_encoding().unwrap(); queue - .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value))) .unwrap(); device.wait(&fence, init_fence_value, !0).unwrap(); device.destroy_buffer(staging_buffer); @@ -542,7 +542,7 @@ impl Example { { let ctx = &mut self.contexts[self.context_index]; self.queue - .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value))) .unwrap(); } @@ -729,7 +729,9 @@ impl Example { } else { None }; - self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue + .submit(&[&cmd_buf], &[&surface_tex], fence_param) + .unwrap(); self.queue.present(&self.surface, surface_tex).unwrap(); ctx.used_cmd_bufs.push(cmd_buf); ctx.used_views.push(surface_tex_view); diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs index 81ab4171e3d..342100e1cb6 100644 --- a/wgpu-hal/examples/raw-gles.rs +++ b/wgpu-hal/examples/raw-gles.rs @@ -183,6 +183,6 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height encoder.begin_render_pass(&rp_desc); encoder.end_render_pass(); let cmd_buf = encoder.end_encoding().unwrap(); - od.queue.submit(&[&cmd_buf], None).unwrap(); + od.queue.submit(&[&cmd_buf], &[], None).unwrap(); } } diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 6454cb89988..c05feae820f 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -14,7 +14,7 @@ use std::{ use winit::window::WindowButtons; const COMMAND_BUFFER_PER_CONTEXT: usize = 100; -const DESIRED_FRAMES: u32 = 3; +const DESIRED_MAX_LATENCY: u32 = 2; /// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc) /// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html) @@ -264,9 +264,9 @@ impl Example { *surface_caps.formats.first().unwrap() }; let surface_config = hal::SurfaceConfiguration { - swap_chain_size: DESIRED_FRAMES - .max(*surface_caps.swap_chain_sizes.start()) - .min(*surface_caps.swap_chain_sizes.end()), + maximum_frame_latency: DESIRED_MAX_LATENCY + .max(*surface_caps.maximum_frame_latency.start()) + .min(*surface_caps.maximum_frame_latency.end()), present_mode: wgt::PresentMode::Fifo, composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, format: surface_format, @@ -755,7 +755,7 @@ impl Example { let mut fence = device.create_fence().unwrap(); let init_cmd = cmd_encoder.end_encoding().unwrap(); queue - .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value))) .unwrap(); device.wait(&fence, init_fence_value, !0).unwrap(); cmd_encoder.reset_all(iter::once(init_cmd)); @@ -960,7 +960,9 @@ impl Example { } else { None }; - self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue + .submit(&[&cmd_buf], &[&surface_tex], fence_param) + .unwrap(); self.queue.present(&self.surface, surface_tex).unwrap(); ctx.used_cmd_bufs.push(cmd_buf); ctx.used_views.push(surface_tex_view); @@ -999,7 +1001,7 @@ impl Example { { let ctx = &mut self.contexts[self.context_index]; self.queue - .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value))) .unwrap(); } diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 1db9b0877da..f6027014d25 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -626,8 +626,8 @@ impl crate::Adapter for super::Adapter { wgt::TextureFormat::Rgb10a2Unorm, wgt::TextureFormat::Rgba16Float, ], - // we currently use a flip effect which supports 2..=16 buffers - swap_chain_sizes: 2..=16, + // See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency + maximum_frame_latency: 1..=16, current_extent, usage: crate::TextureUses::COLOR_TARGET | crate::TextureUses::COPY_SRC diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs index 8833c1adf40..2d4d2c16732 100644 --- a/wgpu-hal/src/dx12/descriptor.rs +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -1,3 +1,4 @@ +use super::null_comptr_check; use crate::auxil::dxgi::result::HResult as _; use bit_set::BitSet; use parking_lot::Mutex; @@ -53,6 +54,8 @@ impl GeneralHeap { .into_device_result("Descriptor heap creation")? }; + null_comptr_check(&raw)?; + Ok(Self { raw: raw.clone(), ty, @@ -130,6 +133,8 @@ impl FixedSizeHeap { ) .into_device_result("Descriptor heap creation")?; + null_comptr_check(&heap)?; + Ok(Self { handle_size: device.get_descriptor_increment_size(ty) as _, availability: !0, // all free! @@ -254,6 +259,8 @@ impl CpuHeap { .create_descriptor_heap(total, ty, d3d12::DescriptorHeapFlags::empty(), 0) .into_device_result("CPU descriptor heap creation")?; + null_comptr_check(&raw)?; + Ok(Self { inner: Mutex::new(CpuHeapInner { _raw: raw.clone(), diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 0c1977203f8..bb128b2a6d2 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,9 +1,11 @@ use crate::{ auxil::{self, dxgi::result::HResult as _}, dx12::shader_compilation, + DeviceError, }; +use d3d12::ComPtr; -use super::{conv, descriptor, view}; +use super::{conv, descriptor, null_comptr_check, view}; use parking_lot::Mutex; use std::{ ffi, mem, @@ -29,7 +31,7 @@ impl super::Device { private_caps: super::PrivateCapabilities, library: &Arc, dxc_container: Option>, - ) -> Result { + ) -> Result { let mem_allocator = if private_caps.suballocation_supported { super::suballocation::create_allocator_wrapper(&raw)? } else { @@ -48,6 +50,8 @@ impl super::Device { }; hr.into_device_result("Idle fence creation")?; + null_comptr_check(&idle_fence)?; + let mut zero_buffer = d3d12::Resource::null(); unsafe { let raw_desc = d3d12_ty::D3D12_RESOURCE_DESC { @@ -89,6 +93,8 @@ impl super::Device { ) .into_device_result("Zero buffer creation")?; + null_comptr_check(&zero_buffer)?; + // Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` // this resource is zeroed by default. }; @@ -142,7 +148,7 @@ impl super::Device { // A null pResource is used to initialize a null descriptor, // which guarantees D3D11-like null binding behavior (reading 0s, writes are discarded) raw.create_render_target_view( - d3d12::ComPtr::null(), + ComPtr::null(), &d3d12::RenderTargetViewDesc::texture_2d( winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, 0, @@ -185,10 +191,10 @@ impl super::Device { // Blocks until the dedicated present queue is finished with all of its work. // // Once this method completes, the surface is able to be resized or deleted. - pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), crate::DeviceError> { + pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), DeviceError> { let cur_value = self.idler.fence.get_value(); if cur_value == !0 { - return Err(crate::DeviceError::Lost); + return Err(DeviceError::Lost); } let value = cur_value + 1; @@ -326,7 +332,7 @@ impl crate::Device for super::Device { unsafe fn create_buffer( &self, desc: &crate::BufferDescriptor, - ) -> Result { + ) -> Result { let mut resource = d3d12::Resource::null(); let mut size = desc.size; if desc.usage.contains(crate::BufferUses::UNIFORM) { @@ -381,11 +387,12 @@ impl crate::Device for super::Device { &self, buffer: &super::Buffer, range: crate::MemoryRange, - ) -> Result { + ) -> Result { let mut ptr = ptr::null_mut(); // TODO: 0 for subresource should be fine here until map and unmap buffer is subresource aware? let hr = unsafe { (*buffer.resource).Map(0, ptr::null(), &mut ptr) }; hr.into_device_result("Map buffer")?; + Ok(crate::BufferMapping { ptr: ptr::NonNull::new(unsafe { ptr.offset(range.start as isize).cast::() }) .unwrap(), @@ -395,7 +402,7 @@ impl crate::Device for super::Device { }) } - unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), DeviceError> { unsafe { (*buffer.resource).Unmap(0, ptr::null()) }; Ok(()) } @@ -406,7 +413,7 @@ impl crate::Device for super::Device { unsafe fn create_texture( &self, desc: &crate::TextureDescriptor, - ) -> Result { + ) -> Result { use super::suballocation::create_texture_resource; let mut resource = d3d12::Resource::null(); @@ -465,7 +472,7 @@ impl crate::Device for super::Device { &self, texture: &super::Texture, desc: &crate::TextureViewDescriptor, - ) -> Result { + ) -> Result { let view_desc = desc.to_internal(texture); Ok(super::TextureView { @@ -591,7 +598,7 @@ impl crate::Device for super::Device { unsafe fn create_sampler( &self, desc: &crate::SamplerDescriptor, - ) -> Result { + ) -> Result { let handle = self.sampler_pool.lock().alloc_handle()?; let reduction = match desc.compare { @@ -633,7 +640,7 @@ impl crate::Device for super::Device { unsafe fn create_command_encoder( &self, desc: &crate::CommandEncoderDescriptor, - ) -> Result { + ) -> Result { let allocator = self .raw .create_command_allocator(d3d12::CmdListType::Direct) @@ -665,7 +672,7 @@ impl crate::Device for super::Device { unsafe fn create_bind_group_layout( &self, desc: &crate::BindGroupLayoutDescriptor, - ) -> Result { + ) -> Result { let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); for entry in desc.entries.iter() { let count = entry.count.map_or(1, NonZeroU32::get); @@ -714,7 +721,7 @@ impl crate::Device for super::Device { unsafe fn create_pipeline_layout( &self, desc: &crate::PipelineLayoutDescriptor, - ) -> Result { + ) -> Result { use naga::back::hlsl; // Pipeline layouts are implemented as RootSignature for D3D12. // @@ -1024,7 +1031,7 @@ impl crate::Device for super::Device { ) .map_err(|e| { log::error!("Unable to find serialization function: {:?}", e); - crate::DeviceError::Lost + DeviceError::Lost })? .into_device_result("Root signature serialization")?; @@ -1033,7 +1040,7 @@ impl crate::Device for super::Device { "Root signature serialization error: {:?}", unsafe { error.as_c_str() }.to_str().unwrap() ); - return Err(crate::DeviceError::Lost); + return Err(DeviceError::Lost); } let raw = self @@ -1076,7 +1083,7 @@ impl crate::Device for super::Device { unsafe fn create_bind_group( &self, desc: &crate::BindGroupDescriptor, - ) -> Result { + ) -> Result { let mut cpu_views = desc .layout .cpu_heap_views @@ -1437,6 +1444,8 @@ impl crate::Device for super::Device { hr.into_result() .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + null_comptr_check(&raw)?; + if let Some(name) = desc.label { let cwstr = conv::map_label(name); unsafe { raw.SetName(cwstr.as_ptr()) }; @@ -1474,6 +1483,8 @@ impl crate::Device for super::Device { crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) })?; + null_comptr_check(&raw)?; + if let Some(name) = desc.label { let cwstr = conv::map_label(name); unsafe { raw.SetName(cwstr.as_ptr()) }; @@ -1489,7 +1500,7 @@ impl crate::Device for super::Device { unsafe fn create_query_set( &self, desc: &wgt::QuerySetDescriptor, - ) -> Result { + ) -> Result { let (heap_ty, raw_ty) = match desc.ty { wgt::QueryType::Occlusion => ( d3d12::QueryHeapType::Occlusion, @@ -1510,6 +1521,8 @@ impl crate::Device for super::Device { .create_query_heap(heap_ty, desc.count, 0) .into_device_result("Query heap creation")?; + null_comptr_check(&raw)?; + if let Some(label) = desc.label { let cwstr = conv::map_label(label); unsafe { raw.SetName(cwstr.as_ptr()) }; @@ -1519,7 +1532,7 @@ impl crate::Device for super::Device { } unsafe fn destroy_query_set(&self, _set: super::QuerySet) {} - unsafe fn create_fence(&self) -> Result { + unsafe fn create_fence(&self) -> Result { let mut raw = d3d12::Fence::null(); let hr = unsafe { self.raw.CreateFence( @@ -1530,13 +1543,15 @@ impl crate::Device for super::Device { ) }; hr.into_device_result("Fence creation")?; + null_comptr_check(&raw)?; + Ok(super::Fence { raw }) } unsafe fn destroy_fence(&self, _fence: super::Fence) {} unsafe fn get_fence_value( &self, fence: &super::Fence, - ) -> Result { + ) -> Result { Ok(unsafe { fence.raw.GetCompletedValue() }) } unsafe fn wait( @@ -1544,7 +1559,7 @@ impl crate::Device for super::Device { fence: &super::Fence, value: crate::FenceValue, timeout_ms: u32, - ) -> Result { + ) -> Result { let timeout_duration = Duration::from_millis(timeout_ms as u64); // We first check if the fence has already reached the value we're waiting for. @@ -1602,7 +1617,7 @@ impl crate::Device for super::Device { winbase::WAIT_OBJECT_0 => {} winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => { log::error!("Wait failed!"); - break Err(crate::DeviceError::Lost); + break Err(DeviceError::Lost); } winerror::WAIT_TIMEOUT => { log::trace!("Wait timed out!"); @@ -1610,7 +1625,7 @@ impl crate::Device for super::Device { } other => { log::error!("Unexpected wait status: 0x{:x}", other); - break Err(crate::DeviceError::Lost); + break Err(DeviceError::Lost); } }; @@ -1664,7 +1679,7 @@ impl crate::Device for super::Device { unsafe fn create_acceleration_structure( &self, _desc: &crate::AccelerationStructureDescriptor, - ) -> Result { + ) -> Result { // Create a D3D12 resource as per-usual. todo!() } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index e0cd1c15cf6..038bb8ca152 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -660,13 +660,22 @@ impl crate::Surface for Surface { let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format); + // The range for `SetMaximumFrameLatency` is 1-16 so the maximum latency requested should be 15 because we add 1. + // https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency + debug_assert!(config.maximum_frame_latency <= 15); + + // Nvidia recommends to use 1-2 more buffers than the maximum latency + // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/ + // For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1. + let swap_chain_buffer = (config.maximum_frame_latency + 1).min(16); + let swap_chain = match self.swap_chain.write().take() { //Note: this path doesn't properly re-initialize all of the things Some(sc) => { let raw = unsafe { sc.release_resources() }; let result = unsafe { raw.ResizeBuffers( - config.swap_chain_size, + swap_chain_buffer, config.extent.width, config.extent.height, non_srgb_format, @@ -693,7 +702,7 @@ impl crate::Surface for Surface { quality: 0, }, buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, - buffer_count: config.swap_chain_size, + buffer_count: swap_chain_buffer, scaling: d3d12::Scaling::Stretch, swap_effect: d3d12::SwapEffect::FlipDiscard, flags, @@ -797,11 +806,11 @@ impl crate::Surface for Surface { | SurfaceTarget::SwapChainPanel(_) => {} } - unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) }; + unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) }; let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() }; - let mut resources = Vec::with_capacity(config.swap_chain_size as usize); - for i in 0..config.swap_chain_size { + let mut resources = Vec::with_capacity(swap_chain_buffer as usize); + for i in 0..swap_chain_buffer { let mut resource = d3d12::Resource::null(); unsafe { swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void()) @@ -877,6 +886,7 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + _surface_textures: &[&Texture], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { let mut temp_lists = self.temp_lists.lock(); @@ -933,3 +943,15 @@ impl crate::Queue for Queue { (1_000_000_000.0 / frequency as f64) as f32 } } + +/// A shorthand for producing a `ResourceCreationFailed` error if a ComPtr is null. +#[inline] +pub fn null_comptr_check( + ptr: &d3d12::ComPtr, +) -> Result<(), crate::DeviceError> { + if d3d12::ComPtr::is_null(ptr) { + return Err(crate::DeviceError::ResourceCreationFailed); + } + + Ok(()) +} diff --git a/wgpu-hal/src/dx12/suballocation.rs b/wgpu-hal/src/dx12/suballocation.rs index 3b9696e4555..47a398be53e 100644 --- a/wgpu-hal/src/dx12/suballocation.rs +++ b/wgpu-hal/src/dx12/suballocation.rs @@ -16,6 +16,7 @@ use placed as allocation; // This is the fast path using gpu_allocator to suballocate buffers and textures. #[cfg(feature = "windows_rs")] mod placed { + use crate::dx12::null_comptr_check; use d3d12::ComPtr; use parking_lot::Mutex; use std::ptr; @@ -115,6 +116,8 @@ mod placed { ) }; + null_comptr_check(resource)?; + Ok((hr, Some(AllocationWrapper { allocation }))) } @@ -162,6 +165,8 @@ mod placed { ) }; + null_comptr_check(resource)?; + Ok((hr, Some(AllocationWrapper { allocation }))) } @@ -223,6 +228,7 @@ mod placed { // This is the older, slower path where it doesn't suballocate buffers. // Tracking issue for when it can be removed: https://github.com/gfx-rs/wgpu/issues/3207 mod committed { + use crate::dx12::null_comptr_check; use d3d12::ComPtr; use parking_lot::Mutex; use std::ptr; @@ -296,6 +302,8 @@ mod committed { ) }; + null_comptr_check(resource)?; + Ok((hr, None)) } @@ -332,6 +340,8 @@ mod committed { ) }; + null_comptr_check(resource)?; + Ok((hr, None)) } diff --git a/wgpu-hal/src/dx12/view.rs b/wgpu-hal/src/dx12/view.rs index 6cbad7bd1d3..ae8e5814a8e 100644 --- a/wgpu-hal/src/dx12/view.rs +++ b/wgpu-hal/src/dx12/view.rs @@ -36,6 +36,7 @@ impl crate::TextureViewDescriptor<'_> { fn aspects_to_plane(aspects: crate::FormatAspects) -> u32 { match aspects { + crate::FormatAspects::STENCIL => 1, crate::FormatAspects::PLANE_1 => 1, crate::FormatAspects::PLANE_2 => 2, _ => 0, diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 12f86e6f31f..d58e779b96a 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -104,6 +104,7 @@ impl crate::Queue for Context { unsafe fn submit( &self, command_buffers: &[&Resource], + surface_textures: &[&Resource], signal_fence: Option<(&mut Resource, crate::FenceValue)>, ) -> DeviceResult<()> { Ok(()) diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 5f92d2c4ab6..afa40237979 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -1138,7 +1138,7 @@ impl crate::Adapter for super::Adapter { vec![wgt::PresentMode::Fifo] //TODO }, composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO - swap_chain_sizes: 2..=2, + maximum_frame_latency: 2..=2, //TODO, unused currently current_extent: None, usage: crate::TextureUses::COLOR_TARGET, }) diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs index ef0ff4b6f37..aa985d81210 100644 --- a/wgpu-hal/src/gles/egl.rs +++ b/wgpu-hal/src/gles/egl.rs @@ -1092,6 +1092,7 @@ impl Surface { .map_err(|e| { log::error!("swap_buffers failed: {}", e); crate::SurfaceError::Lost + // TODO: should we unset the current context here? })?; self.egl .instance diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 5a4deb8e1ac..6ec553bd296 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1748,6 +1748,7 @@ impl crate::Queue for super::Queue { unsafe fn submit( &self, command_buffers: &[&super::CommandBuffer], + _surface_textures: &[&super::Texture], signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { let shared = Arc::clone(&self.shared); diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs index a09a50330d1..dbe8218501e 100644 --- a/wgpu-hal/src/gles/wgl.rs +++ b/wgpu-hal/src/gles/wgl.rs @@ -77,6 +77,24 @@ impl AdapterContext { AdapterContextLock { inner } } + + /// Obtain a lock to the WGL context and get handle to the [`glow::Context`] that can be used to + /// do rendering. + /// + /// Unlike [`lock`](Self::lock), this accepts a device to pass to `make_current` and exposes the error + /// when `make_current` fails. + #[track_caller] + fn lock_with_dc(&self, device: HDC) -> Result, Error> { + let inner = self + .inner + .try_lock_for(Duration::from_secs(CONTEXT_LOCK_TIMEOUT_SECS)) + .expect("Could not lock adapter context. This is most-likely a deadlock."); + + inner + .context + .make_current(device) + .map(|()| AdapterContextLock { inner }) + } } /// A guard containing a lock to an [`AdapterContext`] @@ -603,16 +621,10 @@ impl Surface { window: self.window, }; - let inner = context.inner.lock(); - - if let Err(e) = inner.context.make_current(dc.device) { + let gl = context.lock_with_dc(dc.device).map_err(|e| { log::error!("unable to make the OpenGL context current for surface: {e}",); - return Err(crate::SurfaceError::Other( - "unable to make the OpenGL context current for surface", - )); - } - - let gl = &inner.gl; + crate::SurfaceError::Other("unable to make the OpenGL context current for surface") + })?; unsafe { gl.bind_framebuffer(glow::DRAW_FRAMEBUFFER, None) }; unsafe { gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(sc.framebuffer)) }; @@ -693,16 +705,11 @@ impl crate::Surface for Surface { } let format_desc = device.shared.describe_texture_format(config.format); - let inner = &device.shared.context.inner.lock(); - - if let Err(e) = inner.context.make_current(dc.device) { + let gl = &device.shared.context.lock_with_dc(dc.device).map_err(|e| { log::error!("unable to make the OpenGL context current for surface: {e}",); - return Err(crate::SurfaceError::Other( - "unable to make the OpenGL context current for surface", - )); - } + crate::SurfaceError::Other("unable to make the OpenGL context current for surface") + })?; - let gl = &inner.gl; let renderbuffer = unsafe { gl.create_renderbuffer() }.map_err(|error| { log::error!("Internal swapchain renderbuffer creation failed: {error}"); crate::DeviceError::OutOfMemory diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 7bd6eb77bb7..561024dd98d 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -413,9 +413,12 @@ pub trait Queue: WasmNotSendSync { /// - all of the command buffers were created from command pools /// that are associated with this queue. /// - all of the command buffers had `CommadBuffer::finish()` called. + /// - all surface textures that the command buffers write to must be + /// passed to the surface_textures argument. unsafe fn submit( &self, command_buffers: &[&A::CommandBuffer], + surface_textures: &[&A::SurfaceTexture], signal_fence: Option<(&mut A::Fence, FenceValue)>, ) -> Result<(), DeviceError>; unsafe fn present( @@ -922,11 +925,14 @@ pub struct SurfaceCapabilities { /// Must be at least one. pub formats: Vec, - /// Range for the swap chain sizes. + /// Range for the number of queued frames. /// - /// - `swap_chain_sizes.start` must be at least 1. - /// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`. - pub swap_chain_sizes: RangeInclusive, + /// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given, + /// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set. + /// + /// - `maximum_frame_latency.start` must be at least 1. + /// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`. + pub maximum_frame_latency: RangeInclusive, /// Current extent of the surface, if known. pub current_extent: Option, @@ -1252,9 +1258,9 @@ pub struct RenderPipelineDescriptor<'a, A: Api> { #[derive(Debug, Clone)] pub struct SurfaceConfiguration { - /// Number of textures in the swap chain. Must be in - /// `SurfaceCapabilities::swap_chain_size` range. - pub swap_chain_size: u32, + /// Maximum number of queued frames. Must be in + /// `SurfaceCapabilities::maximum_frame_latency` range. + pub maximum_frame_latency: u32, /// Vertical synchronization mode. pub present_mode: wgt::PresentMode, /// Alpha composition mode. diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 3d8f6f3e575..a946ce58196 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -320,13 +320,14 @@ impl crate::Adapter for super::Adapter { let pc = &self.shared.private_caps; Some(crate::SurfaceCapabilities { formats, - //Note: this is hardcoded in `CAMetalLayer` documentation - swap_chain_sizes: if pc.can_set_maximum_drawables_count { - 2..=3 + // We use this here to govern the maximum number of drawables + 1. + // See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount + maximum_frame_latency: if pc.can_set_maximum_drawables_count { + 1..=2 } else { - // 3 is the default in `CAMetalLayer` documentation + // 3 is the default value for maximum drawables in `CAMetalLayer` documentation // iOS 10.3 was tested to use 3 on iphone5s - 3..=3 + 2..=2 }, present_modes: if pc.can_set_display_sync { vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate] diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 39589115e73..298f60faac9 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -368,6 +368,7 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + _surface_textures: &[&SurfaceTexture], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { objc::rc::autoreleasepool(|| { diff --git a/wgpu-hal/src/metal/surface.rs b/wgpu-hal/src/metal/surface.rs index e54a176da59..a97eff0aaed 100644 --- a/wgpu-hal/src/metal/surface.rs +++ b/wgpu-hal/src/metal/surface.rs @@ -221,7 +221,7 @@ impl crate::Surface for super::Surface { } // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3) - render_layer.set_maximum_drawable_count(config.swap_chain_size as _); + render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1); render_layer.set_drawable_size(drawable_size); if caps.can_set_next_drawable_timeout { let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false]; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 477d166c702..85e620d23c1 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1848,7 +1848,11 @@ impl crate::Adapter for super::Adapter { .collect(); Some(crate::SurfaceCapabilities { formats, - swap_chain_sizes: caps.min_image_count..=max_image_count, + // TODO: Right now we're always trunkating the swap chain + // (presumably - we're actually setting the min image count which isn't necessarily the swap chain size) + // Instead, we should use extensions when available to wait in present. + // See https://github.com/gfx-rs/wgpu/issues/2869 + maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`. current_extent, usage: conv::map_vk_image_usage(caps.supported_usage_flags), present_modes: raw_present_modes diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a37017a9e67..fdfb6ee9ed0 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -579,7 +579,7 @@ impl super::Device { let mut info = vk::SwapchainCreateInfoKHR::builder() .flags(raw_flags) .surface(surface.raw) - .min_image_count(config.swap_chain_size) + .min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869 .image_format(original_format) .image_color_space(color_space) .image_extent(vk::Extent2D { @@ -627,8 +627,16 @@ impl super::Device { let images = unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?; - let vk_info = vk::FenceCreateInfo::builder().build(); - let fence = unsafe { self.shared.raw.create_fence(&vk_info, None) } + // NOTE: It's important that we define at least images.len() + 1 wait + // semaphores, since we prospectively need to provide the call to + // acquire the next image with an unsignaled semaphore. + let surface_semaphores = (0..images.len() + 1) + .map(|_| unsafe { + self.shared + .raw + .create_semaphore(&vk::SemaphoreCreateInfo::builder(), None) + }) + .collect::, _>>() .map_err(crate::DeviceError::from)?; Ok(super::Swapchain { @@ -636,10 +644,11 @@ impl super::Device { raw_flags, functor, device: Arc::clone(&self.shared), - fence, images, config: config.clone(), view_formats: wgt_view_formats, + surface_semaphores, + next_surface_index: 0, }) } diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index 179842c1e5c..1f0159413f1 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -169,7 +169,7 @@ impl super::Swapchain { /// # Safety /// /// - The device must have been made idle before calling this function. - unsafe fn release_resources(self, device: &ash::Device) -> Self { + unsafe fn release_resources(mut self, device: &ash::Device) -> Self { profiling::scope!("Swapchain::release_resources"); { profiling::scope!("vkDeviceWaitIdle"); @@ -177,7 +177,13 @@ impl super::Swapchain { // the presentation work is done, we are forced to wait until the device is idle. let _ = unsafe { device.device_wait_idle() }; }; - unsafe { device.destroy_fence(self.fence, None) }; + + for semaphore in self.surface_semaphores.drain(..) { + unsafe { + device.destroy_semaphore(semaphore, None); + } + } + self } } @@ -934,10 +940,12 @@ impl crate::Surface for super::Surface { timeout_ns = u64::MAX; } + let wait_semaphore = sc.surface_semaphores[sc.next_surface_index]; + // will block if no image is available let (index, suboptimal) = match unsafe { sc.functor - .acquire_next_image(sc.raw, timeout_ns, vk::Semaphore::null(), sc.fence) + .acquire_next_image(sc.raw, timeout_ns, wait_semaphore, vk::Fence::null()) } { // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. // See the comment in `Queue::present`. @@ -957,17 +965,14 @@ impl crate::Surface for super::Surface { } }; + sc.next_surface_index += 1; + sc.next_surface_index %= sc.surface_semaphores.len(); + // special case for Intel Vulkan returning bizzare values (ugh) if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 { return Err(crate::SurfaceError::Outdated); } - let fences = &[sc.fence]; - - unsafe { sc.device.raw.wait_for_fences(fences, true, !0) } - .map_err(crate::DeviceError::from)?; - unsafe { sc.device.raw.reset_fences(fences) }.map_err(crate::DeviceError::from)?; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209 let raw_flags = if sc .raw_flags @@ -994,6 +999,7 @@ impl crate::Surface for super::Surface { }, view_formats: sc.view_formats.clone(), }, + wait_semaphore, }; Ok(Some(crate::AcquiredSurfaceTexture { texture, diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 45deda5d5b3..787ebd72677 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -146,10 +146,14 @@ struct Swapchain { raw_flags: vk::SwapchainCreateFlagsKHR, functor: khr::Swapchain, device: Arc, - fence: vk::Fence, images: Vec, config: crate::SurfaceConfiguration, view_formats: Vec, + /// One wait semaphore per swapchain image. This will be associated with the + /// surface texture, and later collected during submission. + surface_semaphores: Vec, + /// Current semaphore index to use when acquiring a surface. + next_surface_index: usize, } pub struct Surface { @@ -163,6 +167,7 @@ pub struct Surface { pub struct SurfaceTexture { index: u32, texture: Texture, + wait_semaphore: vk::Semaphore, } impl Borrow for SurfaceTexture { @@ -585,29 +590,43 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + surface_textures: &[&SurfaceTexture], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { - let vk_cmd_buffers = command_buffers - .iter() - .map(|cmd| cmd.raw) - .collect::>(); + let mut fence_raw = vk::Fence::null(); - let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + let mut wait_stage_masks = Vec::new(); + let mut wait_semaphores = Vec::new(); + let mut signal_semaphores = ArrayVec::<_, 2>::new(); + let mut signal_values = ArrayVec::<_, 2>::new(); - let mut fence_raw = vk::Fence::null(); - let mut vk_timeline_info; - let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()]; - let signal_values; + for &surface_texture in surface_textures { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(surface_texture.wait_semaphore); + } + + let old_index = self.relay_index.load(Ordering::Relaxed); + + let sem_index = if old_index >= 0 { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(self.relay_semaphores[old_index as usize]); + (old_index as usize + 1) % self.relay_semaphores.len() + } else { + 0 + }; + + signal_semaphores.push(self.relay_semaphores[sem_index]); + + self.relay_index + .store(sem_index as isize, Ordering::Relaxed); if let Some((fence, value)) = signal_fence { fence.maintain(&self.device.raw)?; match *fence { Fence::TimelineSemaphore(raw) => { - signal_values = [!0, value]; - signal_semaphores[1] = raw; - vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder() - .signal_semaphore_values(&signal_values); - vk_info = vk_info.push_next(&mut vk_timeline_info); + signal_semaphores.push(raw); + signal_values.push(!0); + signal_values.push(value); } Fence::FencePool { ref mut active, @@ -627,26 +646,25 @@ impl crate::Queue for Queue { } } - let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE]; - let old_index = self.relay_index.load(Ordering::Relaxed); - let sem_index = if old_index >= 0 { - vk_info = vk_info - .wait_semaphores(&self.relay_semaphores[old_index as usize..old_index as usize + 1]) - .wait_dst_stage_mask(&wait_stage_mask); - (old_index as usize + 1) % self.relay_semaphores.len() - } else { - 0 - }; - self.relay_index - .store(sem_index as isize, Ordering::Relaxed); - signal_semaphores[0] = self.relay_semaphores[sem_index]; + let vk_cmd_buffers = command_buffers + .iter() + .map(|cmd| cmd.raw) + .collect::>(); - let signal_count = if signal_semaphores[1] == vk::Semaphore::null() { - 1 - } else { - 2 - }; - vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]); + let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + + vk_info = vk_info + .wait_semaphores(&wait_semaphores) + .wait_dst_stage_mask(&wait_stage_masks) + .signal_semaphores(&signal_semaphores); + + let mut vk_timeline_info; + + if !signal_values.is_empty() { + vk_timeline_info = + vk::TimelineSemaphoreSubmitInfo::builder().signal_semaphore_values(&signal_values); + vk_info = vk_info.push_next(&mut vk_timeline_info); + } profiling::scope!("vkQueueSubmit"); unsafe { diff --git a/wgpu-types/Cargo.toml b/wgpu-types/Cargo.toml index 90846873b23..7528b18c0a5 100644 --- a/wgpu-types/Cargo.toml +++ b/wgpu-types/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wgpu-types" -version = "0.18.0" +version = "0.19.0" authors = ["gfx-rs developers"] edition = "2021" description = "WebGPU types" diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 2b800a35c05..cfc2af253ee 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -5151,6 +5151,26 @@ pub struct SurfaceConfiguration { /// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are /// unsupported. pub present_mode: PresentMode, + /// Desired maximum number of frames that the presentation engine should queue in advance. + /// + /// This is a hint to the backend implementation and will always be clamped to the supported range. + /// As a consequence, either the maximum frame latency is set directly on the swap chain, + /// or waits on present are scheduled to avoid exceeding the maximum frame latency if supported, + /// or the swap chain size is set to (max-latency + 1). + /// + /// Defaults to 2 when created via `wgpu::Surface::get_default_config`. + /// + /// Typical values range from 3 to 1, but higher values are possible: + /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame + /// to be queued up. This typically avoids starving the GPU's work queue. + /// Higher values are useful for achieving a constant flow of frames to the display under varying load. + /// * Choose 1 for low latency from frame recording to frame display. + /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU + /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`, + /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle). + /// It is currently not possible to query this. See . + /// * A value of 0 is generally not supported and always clamped to a higher value. + pub desired_maximum_frame_latency: u32, /// Specifies how the alpha channel of the textures should be handled during compositing. pub alpha_mode: CompositeAlphaMode, /// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture(). @@ -5170,6 +5190,7 @@ impl SurfaceConfiguration { width: self.width, height: self.height, present_mode: self.present_mode, + desired_maximum_frame_latency: self.desired_maximum_frame_latency, alpha_mode: self.alpha_mode, view_formats: fun(self.view_formats.clone()), } diff --git a/wgpu/src/backend/mod.rs b/wgpu/src/backend/mod.rs index 02d9632efb3..9a0b7ef28a3 100644 --- a/wgpu/src/backend/mod.rs +++ b/wgpu/src/backend/mod.rs @@ -1,9 +1,23 @@ -#[cfg(webgpu)] +#[cfg(all(webgpu, web_sys_unstable_apis))] mod webgpu; -#[cfg(webgpu)] +#[cfg(all(webgpu, web_sys_unstable_apis))] pub(crate) use webgpu::{get_browser_gpu_property, ContextWebGpu}; +#[cfg(all(webgpu, not(web_sys_unstable_apis)))] +compile_error!( + "webgpu feature used without web_sys_unstable_apis config: +Here are some ways to resolve this: +* If you wish to use webgpu backend, create a .cargo/config.toml in the root of the repo containing: + [build] + rustflags = [ \"--cfg=web_sys_unstable_apis\" ] + rustdocflags = [ \"--cfg=web_sys_unstable_apis\" ] +* If you wish to disable webgpu backend and instead use webgl backend, change your wgpu Cargo.toml entry to: + wgpu = { version = \"\", default-features = false, features = [\"webgl\"] } +" +); + #[cfg(wgpu_core)] mod wgpu_core; + #[cfg(wgpu_core)] pub(crate) use wgpu_core::ContextWgpuCore; diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 1d4139e73fa..ddc133dd1e3 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -852,13 +852,10 @@ impl crate::Context for ContextWgpuCore { ShaderSource::Glsl { ref shader, stage, - ref defines, + defines, } => { // Parse the given shader code and store its representation. - let options = naga::front::glsl::Options { - stage, - defines: defines.clone(), - }; + let options = naga::front::glsl::Options { stage, defines }; let mut parser = naga::front::glsl::Frontend::default(); let module = parser.parse(&options, shader).unwrap(); diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index f12892dfb02..9b4af33cd61 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -1812,7 +1812,7 @@ impl Instance { ); } - #[cfg(webgpu)] + #[cfg(all(webgpu, web_sys_unstable_apis))] { let is_only_available_backend = !cfg!(wgpu_core); let requested_webgpu = _instance_desc.backends.contains(Backends::BROWSER_WEBGPU); @@ -3086,7 +3086,7 @@ impl<'a> BufferSlice<'a> { /// this function directly hands you the ArrayBuffer that we mapped the data into in js. /// /// This is only available on WebGPU, on any other backends this will return `None`. - #[cfg(webgpu)] + #[cfg(all(webgpu, web_sys_unstable_apis))] pub fn get_mapped_range_as_array_buffer(&self) -> Option { self.buffer .context @@ -4738,6 +4738,12 @@ impl SurfaceTexture { /// Schedule this texture to be presented on the owning surface. /// /// Needs to be called after any work on the texture is scheduled via [`Queue::submit`]. + /// + /// # Platform dependent behavior + /// + /// On Wayland, `present` will attach a `wl_buffer` to the underlying `wl_surface` and commit the new surface + /// state. If it is desired to do things such as request a frame callback, scale the surface using the viewporter + /// or synchronize other double buffered state, then these operations should be done before the call to `present`. pub fn present(mut self) { self.presented = true; DynContext::surface_present( @@ -4793,6 +4799,7 @@ impl Surface<'_> { format: *caps.formats.get(0)?, width, height, + desired_maximum_frame_latency: 2, present_mode: *caps.present_modes.get(0)?, alpha_mode: wgt::CompositeAlphaMode::Auto, view_formats: vec![],