From 1103fccd591d7fff6d5361dcabf5a616be7288b1 Mon Sep 17 00:00:00 2001 From: Firestar99 <31222740+Firestar99@users.noreply.github.com> Date: Mon, 23 Sep 2024 17:04:40 +0200 Subject: [PATCH] Subgroup intrinsics (#14) * subgroup: add trait VectorOrScalar, representing either a vector or a scalar type * subgroup: added all non-uniform subgroup operations * subgroup: remove all target_feature cfgs, replaced with docs * subgroup: added all subgroupBarrier*() functions from glsl * subgroup: added non group-op tests * subgroup: fixed asm for instructions taking GROUP_OP generic * subgroup: added tests for group-op instructions * gitignore: added rustc-ice* error reports * subgroup: added test for subgroup buildins * subgroup: make SubgroupMask a struct to prevent implicit casts to and from UVec4 * subgroup: fixed clippy lints * subgroup: drop the `non_uniform` from all subgroup functions, matching glsl * changelog: add subgroup intrinsics PR * subgroup: make VectorOrScalar trait match discussions in https://github.com/EmbarkStudios/rust-gpu/pull/1030 * cleanup: remove internal type F32x2 for glam::Vec2 --------- Co-authored-by: Firestar99 <4696087-firestar99@users.noreply.gitlab.com> --- .gitignore | 1 + CHANGELOG.md | 1 + .../src/builder/spirv_asm.rs | 19 +- crates/spirv-std/src/arch.rs | 2 + crates/spirv-std/src/arch/subgroup.rs | 2117 +++++++++++++++++ crates/spirv-std/src/float.rs | 16 +- crates/spirv-std/src/scalar.rs | 32 +- crates/spirv-std/src/vector.rs | 59 +- tests/ui/arch/all.rs | 10 +- tests/ui/arch/any.rs | 10 +- tests/ui/arch/subgroup/subgroup_ballot.rs | 17 + tests/ui/arch/subgroup/subgroup_ballot.stderr | 10 + .../subgroup/subgroup_ballot_bit_count.rs | 17 + .../subgroup/subgroup_ballot_bit_count.stderr | 8 + .../arch/subgroup/subgroup_broadcast_first.rs | 17 + .../subgroup/subgroup_broadcast_first.stderr | 8 + tests/ui/arch/subgroup/subgroup_builtins.rs | 17 + tests/ui/arch/subgroup/subgroup_elect.rs | 16 + tests/ui/arch/subgroup/subgroup_elect.stderr | 7 + .../arch/subgroup/subgroup_i_add_clustered.rs | 18 + .../subgroup/subgroup_i_add_clustered.stderr | 8 + .../subgroup/subgroup_i_add_exclusive_scan.rs | 18 + .../subgroup_i_add_exclusive_scan.stderr | 8 + .../subgroup/subgroup_i_add_inclusive_scan.rs | 18 + .../subgroup_i_add_inclusive_scan.stderr | 8 + .../ui/arch/subgroup/subgroup_i_add_reduce.rs | 18 + .../subgroup/subgroup_i_add_reduce.stderr | 8 + 27 files changed, 2437 insertions(+), 51 deletions(-) create mode 100644 crates/spirv-std/src/arch/subgroup.rs create mode 100644 tests/ui/arch/subgroup/subgroup_ballot.rs create mode 100644 tests/ui/arch/subgroup/subgroup_ballot.stderr create mode 100644 tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs create mode 100644 tests/ui/arch/subgroup/subgroup_ballot_bit_count.stderr create mode 100644 tests/ui/arch/subgroup/subgroup_broadcast_first.rs create mode 100644 tests/ui/arch/subgroup/subgroup_broadcast_first.stderr create mode 100644 tests/ui/arch/subgroup/subgroup_builtins.rs create mode 100644 tests/ui/arch/subgroup/subgroup_elect.rs create mode 100644 tests/ui/arch/subgroup/subgroup_elect.stderr create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_clustered.rs create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_clustered.stderr create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.rs create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.stderr create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.stderr create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_reduce.rs create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_reduce.stderr diff --git a/.gitignore b/.gitignore index d93c2cc44c..410633c8d7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ target/ .vim/ tests/Cargo.lock .github/install-spirv-tools/Cargo.lock +rustc-ice-*.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index dba2a620c4..2b63ba5323 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Signed for loops like `for _ in 0..4i32 {}` no longer compile. We recommend switching to unsigned for loops and casting back to signed integers in the meanwhile. ### Changed 🛠 +- [PR#14](https://github.com/Rust-GPU/rust-gpu/pull/14) add subgroup intrinsics matching glsl's [`GL_KHR_shader_subgroup`](https://github.com/KhronosGroup/GLSL/blob/main/extensions/khr/GL_KHR_shader_subgroup.txt) - [PR#13](https://github.com/Rust-GPU/rust-gpu/pull/13) allow cargo features to be passed to the shader crate - [PR#12](https://github.com/rust-gpu/rust-gpu/pull/12) updated toolchain to `nightly-2024-04-24` - [PR#9](https://github.com/Rust-GPU/rust-gpu/pull/9) relaxed `glam` version requirements (`>=0.22, <=0.29`) diff --git a/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs b/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs index 623893e465..a8328dbec0 100644 --- a/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs +++ b/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs @@ -2,11 +2,13 @@ use super::Builder; use crate::builder_spirv::{BuilderCursor, SpirvValue}; use crate::codegen_cx::CodegenCx; use crate::spirv_type::SpirvType; +use num_traits::FromPrimitive; use rspirv::dr; use rspirv::grammar::{reflect, LogicalOperand, OperandKind, OperandQuantifier}; use rspirv::spirv::{ - FPFastMathMode, FragmentShadingRate, FunctionControl, ImageOperands, KernelProfilingInfo, - LoopControl, MemoryAccess, MemorySemantics, Op, RayFlags, SelectionControl, StorageClass, Word, + FPFastMathMode, FragmentShadingRate, FunctionControl, GroupOperation, ImageOperands, + KernelProfilingInfo, LoopControl, MemoryAccess, MemorySemantics, Op, RayFlags, + SelectionControl, StorageClass, Word, }; use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; use rustc_codegen_ssa::mir::place::PlaceRef; @@ -1347,10 +1349,15 @@ impl<'cx, 'tcx> Builder<'cx, 'tcx> { Ok(x) => inst.operands.push(dr::Operand::Scope(x)), Err(()) => self.err(format!("unknown Scope {word}")), }, - (OperandKind::GroupOperation, Some(word)) => match word.parse() { - Ok(x) => inst.operands.push(dr::Operand::GroupOperation(x)), - Err(()) => self.err(format!("unknown GroupOperation {word}")), - }, + (OperandKind::GroupOperation, Some(word)) => { + match word.parse::().ok().and_then(GroupOperation::from_u32) { + Some(id) => inst.operands.push(dr::Operand::GroupOperation(id)), + None => match word.parse() { + Ok(x) => inst.operands.push(dr::Operand::GroupOperation(x)), + Err(()) => self.err(format!("unknown GroupOperation {word}")), + }, + } + } (OperandKind::KernelEnqueueFlags, Some(word)) => match word.parse() { Ok(x) => inst.operands.push(dr::Operand::KernelEnqueueFlags(x)), Err(()) => self.err(format!("unknown KernelEnqueueFlags {word}")), diff --git a/crates/spirv-std/src/arch.rs b/crates/spirv-std/src/arch.rs index 0fa43fea0f..dc061c7a00 100644 --- a/crates/spirv-std/src/arch.rs +++ b/crates/spirv-std/src/arch.rs @@ -19,6 +19,7 @@ mod demote_to_helper_invocation_ext; mod derivative; mod primitive; mod ray_tracing; +mod subgroup; pub use atomics::*; pub use barrier::*; @@ -26,6 +27,7 @@ pub use demote_to_helper_invocation_ext::*; pub use derivative::*; pub use primitive::*; pub use ray_tracing::*; +pub use subgroup::*; /// Result is true if any component of `vector` is true, otherwise result is /// false. diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs new file mode 100644 index 0000000000..b587f0e0ad --- /dev/null +++ b/crates/spirv-std/src/arch/subgroup.rs @@ -0,0 +1,2117 @@ +#[cfg(target_arch = "spirv")] +use crate::arch::barrier; +use crate::float::Float; +use crate::integer::{Integer, SignedInteger, UnsignedInteger}; +#[cfg(target_arch = "spirv")] +use crate::memory::{Scope, Semantics}; +use crate::vector::VectorOrScalar; +#[cfg(target_arch = "spirv")] +use core::arch::asm; + +#[cfg(target_arch = "spirv")] +const SUBGROUP: u32 = Scope::Subgroup as u32; + +/// `SubgroupMask` is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup. +/// Mostly used in group ballot operations. +#[derive(Copy, Clone, Default, Eq, PartialEq)] +pub struct SubgroupMask(pub glam::UVec4); + +/// Defines the class of group operation. +#[non_exhaustive] +#[derive(Debug, PartialEq, Eq)] +pub enum GroupOperation { + /// A reduction operation for all values of a specific value X specified by invocations within a workgroup. + Reduce = 0, + /// A binary operation with an identity I and n (where n is the size of the workgroup) + /// elements[a0, a1, … an-1] resulting in [a0, (a0 op a1), …(a0 op a1 op … op an-1)] + InclusiveScan = 1, + /// A binary operation with an identity I and n (where n is the size of the workgroup) + /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)]. + ExclusiveScan = 2, + // /// See [`GROUP_OPERATION_CLUSTERED_REDUCE`] + // ClusteredReduce = 3, + /// Reserved. + /// + /// Requires Capability `GroupNonUniformPartitionedNV`. + PartitionedReduceNV = 6, + /// Reserved. + /// + /// Requires Capability `GroupNonUniformPartitionedNV`. + PartitionedInclusiveScanNV = 7, + /// Reserved. + /// + /// Requires Capability `GroupNonUniformPartitionedNV`. + PartitionedExclusiveScanNV = 8, +} + +/// The [`GroupOperation`] `ClusteredReduce`. +/// +/// All instructions with a [`GroupOperation`] require an additional `ClusterSize` parameter when [`GroupOperation`] is +/// `ClusteredReduce`. To map this requirement into rust, all function have a base version accepting [`GroupOperation`] +/// as a const generic, and a `_clustered` variant that is fixed to `ClusteredReduce` and takes the additional +/// `ClusterSize` parameter as a const generic. To not accidentally use a `ClusteredReduce` in the base variant of the +/// function, it was removed from the [`GroupOperation`] enum and instead resides individually. +pub const GROUP_OPERATION_CLUSTERED_REDUCE: u32 = 3; + +/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled. +/// +/// The function subgroupBarrier() enforces that all active invocations within a +/// subgroup must execute this function before any are allowed to continue their +/// execution, and the results of any memory stores performed using coherent +/// variables performed prior to the call will be visible to any future +/// coherent access to the same memory performed by any other shader invocation +/// within the same subgroup. +/// +/// Requires Capability `GroupNonUniform`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "subgroupBarrier")] +#[inline] +pub unsafe fn subgroup_barrier() { + unsafe { + barrier::control_barrier::< + SUBGROUP, + SUBGROUP, + { + Semantics::ACQUIRE_RELEASE.bits() + | Semantics::UNIFORM_MEMORY.bits() + | Semantics::WORKGROUP_MEMORY.bits() + | Semantics::IMAGE_MEMORY.bits() + }, + >(); + } +} + +/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled. +/// +/// The function subgroupMemoryBarrier() enforces the ordering of all memory +/// transactions issued within a single shader invocation, as viewed by other +/// invocations in the same subgroup. +/// +/// Requires Capability `GroupNonUniform`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "subgroupMemoryBarrier")] +#[inline] +pub unsafe fn subgroup_memory_barrier() { + unsafe { + barrier::memory_barrier::< + SUBGROUP, + { + Semantics::ACQUIRE_RELEASE.bits() + | Semantics::UNIFORM_MEMORY.bits() + | Semantics::WORKGROUP_MEMORY.bits() + | Semantics::IMAGE_MEMORY.bits() + }, + >(); + } +} + +/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled. +/// +/// The function subgroupMemoryBarrierBuffer() enforces the ordering of all +/// memory transactions to buffer variables issued within a single shader +/// invocation, as viewed by other invocations in the same subgroup. +/// +/// Requires Capability `GroupNonUniform`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "subgroupMemoryBarrierBuffer")] +#[inline] +pub unsafe fn subgroup_memory_barrier_buffer() { + unsafe { + barrier::memory_barrier::< + SUBGROUP, + { Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() }, + >(); + } +} + +/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled. +/// +/// The function subgroupMemoryBarrierShared() enforces the ordering of all +/// memory transactions to shared variables issued within a single shader +/// invocation, as viewed by other invocations in the same subgroup. +/// +/// Only available in compute shaders. +/// +/// Requires Capability `GroupNonUniform`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "subgroupMemoryBarrierShared")] +#[inline] +pub unsafe fn subgroup_memory_barrier_shared() { + unsafe { + barrier::memory_barrier::< + SUBGROUP, + { Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() }, + >(); + } +} + +/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled. +/// +/// The function subgroupMemoryBarrierImage() enforces the ordering of all +/// memory transactions to images issued within a single shader invocation, as +/// viewed by other invocations in the same subgroup. +/// +/// Requires Capability `GroupNonUniform`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "subgroupMemoryBarrierImage")] +#[inline] +pub unsafe fn subgroup_memory_barrier_image() { + unsafe { + barrier::memory_barrier::< + SUBGROUP, + { Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() }, + >(); + } +} + +/// Result is true only in the active invocation with the lowest id in the group, otherwise result is false. +/// +/// Result Type must be a Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Requires Capability `GroupNonUniform`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformElect")] +#[inline] +pub unsafe fn subgroup_elect() -> bool { + let mut result = false; + + unsafe { + asm! { + "%bool = OpTypeBool", + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%result = OpGroupNonUniformElect %bool %subgroup", + "OpStore {result} %result", + subgroup = const SUBGROUP, + result = in(reg) &mut result, + } + } + + result +} + +/// Evaluates a predicate for all active invocations in the group, resulting in true if predicate evaluates to true for all active invocations in the group, otherwise the result is false. +/// +/// Result Type must be a Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Predicate must be a Boolean type. +/// +/// Requires Capability `GroupNonUniformVote`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformAll")] +#[inline] +pub unsafe fn subgroup_all(predicate: bool) -> bool { + let mut result = false; + + unsafe { + asm! { + "%bool = OpTypeBool", + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%predicate = OpLoad _ {predicate}", + "%result = OpGroupNonUniformAll %bool %subgroup %predicate", + "OpStore {result} %result", + subgroup = const SUBGROUP, + predicate = in(reg) &predicate, + result = in(reg) &mut result, + } + } + + result +} + +/// Evaluates a predicate for all active invocations in the group, resulting in true if predicate evaluates to true for any active invocation in the group, otherwise the result is false. +/// +/// Result Type must be a Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Predicate must be a Boolean type. +/// +/// Requires Capability `GroupNonUniformVote`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformAny")] +#[inline] +pub unsafe fn subgroup_any(predicate: bool) -> bool { + let mut result = false; + + unsafe { + asm! { + "%bool = OpTypeBool", + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%predicate = OpLoad _ {predicate}", + "%result = OpGroupNonUniformAny %bool %subgroup %predicate", + "OpStore {result} %result", + subgroup = const SUBGROUP, + predicate = in(reg) &predicate, + result = in(reg) &mut result, + } + } + + result +} + +/// Evaluates a value for all active invocations in the group. The result is true if Value is equal for all active invocations in the group. Otherwise, the result is false. +/// +/// Result Type must be a Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Value must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used. +/// +/// Requires Capability `GroupNonUniformVote`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformAllEqual")] +#[inline] +pub unsafe fn subgroup_all_equal(value: T) -> bool { + let mut result = false; + + unsafe { + asm! { + "%bool = OpTypeBool", + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformAllEqual %bool %subgroup %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the Value of the invocation identified by the id Id to all active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The type of Value must be the same as Result Type. +/// +/// Id must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Before version 1.5, Id must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when Id is not dynamically uniform. +/// +/// The resulting value is undefined if Id is an inactive invocation, or is greater than or equal to the size of the group. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBroadcast")] +#[inline] +pub unsafe fn subgroup_broadcast(value: T, id: u32) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%id = OpLoad _ {id}", + "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + id = in(reg) &id, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the Value of the invocation from the active invocation with the lowest id in the group to all active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBroadcastFirst")] +#[inline] +pub unsafe fn subgroup_broadcast_first(value: T) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is a bitfield value combining the Predicate value from all invocations in the group that execute the same dynamic instance of this instruction. The bit is set to one if the corresponding invocation is active and the Predicate for that invocation evaluated to true; otherwise, it is set to zero. +/// +/// Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0. +/// +/// Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. +/// +/// Predicate must be a Boolean type. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBallot")] +#[inline] +pub unsafe fn subgroup_ballot(predicate: bool) -> SubgroupMask { + let mut result = SubgroupMask::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%groupmask = OpTypeVector %u32 4", + "%subgroup = OpConstant %u32 {subgroup}", + "%predicate = OpLoad _ {predicate}", + "%result = OpGroupNonUniformBallot %groupmask %subgroup %predicate", + "OpStore {result} %result", + subgroup = const SUBGROUP, + predicate = in(reg) &predicate, + result = in(reg) &mut result, + } + } + + result +} + +/// Evaluates a value for all active invocations in the group, resulting in true if the bit in Value for the corresponding invocation is set to one, otherwise the result is false. +/// +/// Result Type must be a Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0. +/// +/// Behavior is undefined unless Value is the same for all invocations that execute the same dynamic instance of this instruction. +/// +/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformInverseBallot")] +#[inline] +pub unsafe fn subgroup_inverse_ballot(subgroup_mask: SubgroupMask) -> bool { + let mut result = false; + + unsafe { + asm! { + "%bool = OpTypeBool", + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%subgroup_mask = OpLoad _ {subgroup_mask}", + "%result = OpGroupNonUniformInverseBallot %bool %subgroup %subgroup_mask", + "OpStore {result} %result", + subgroup = const SUBGROUP, + subgroup_mask = in(reg) &subgroup_mask, + result = in(reg) &mut result, + } + } + + result +} + +/// Evaluates a value for all active invocations in the group, resulting in true if the bit in Value that corresponds to Index is set to one, otherwise the result is false. +/// +/// Result Type must be a Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0. +/// +/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations. +/// +/// Index must be a scalar of integer type, whose Signedness operand is 0. +/// +/// The resulting value is undefined if Index is greater than or equal to the size of the group. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBallotBitExtract")] +#[inline] +pub unsafe fn subgroup_ballot_bit_extract(subgroup_mask: SubgroupMask, id: u32) -> bool { + let mut result = false; + + unsafe { + asm! { + "%bool = OpTypeBool", + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%subgroup_mask = OpLoad _ {subgroup_mask}", + "%id = OpLoad _ {id}", + "%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %subgroup_mask %id", + "OpStore {result} %result", + subgroup = const SUBGROUP, + subgroup_mask = in(reg) &subgroup_mask, + id = in(reg) &id, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the number of bits that are set to 1 in Value, considering only the bits in Value required to represent all bits of the group's invocations. +/// +/// Result Type must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0. +/// +/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBallotBitCount")] +#[inline] +pub unsafe fn subgroup_ballot_bit_count(subgroup_mask: SubgroupMask) -> u32 { + let mut result = 0; + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%subgroup_mask = OpLoad _ {subgroup_mask}", + "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %subgroup_mask", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + subgroup_mask = in(reg) &subgroup_mask, + result = in(reg) &mut result, + } + } + + result +} + +/// Find the least significant bit set to 1 in Value, considering only the bits in Value required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined. +/// +/// Result Type must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0. +/// +/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBallotFindLSB")] +#[inline] +pub unsafe fn subgroup_ballot_find_lsb(subgroup_mask: SubgroupMask) -> u32 { + let mut result = 0; + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%subgroup_mask = OpLoad _ {subgroup_mask}", + "%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %subgroup_mask", + "OpStore {result} %result", + subgroup = const SUBGROUP, + subgroup_mask = in(reg) &subgroup_mask, + result = in(reg) &mut result, + } + } + + result +} + +/// Find the most significant bit set to 1 in Value, considering only the bits in Value required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined. +/// +/// Result Type must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0. +/// +/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations. +/// +/// Requires Capability `GroupNonUniformBallot`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBallotFindMSB")] +#[inline] +pub unsafe fn subgroup_ballot_find_msb(subgroup_mask: SubgroupMask) -> u32 { + let mut result = 0; + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%subgroup_mask = OpLoad _ {subgroup_mask}", + "%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %subgroup_mask", + "OpStore {result} %result", + subgroup = const SUBGROUP, + subgroup_mask = in(reg) &subgroup_mask, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the Value of the invocation identified by the id Id. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. +/// +/// The type of Value must be the same as Result Type. +/// +/// Id must be a scalar of integer type, whose Signedness operand is 0. +/// +/// The resulting value is undefined if Id is an inactive invocation, or is greater than or equal to the size of the group. +/// +/// Requires Capability `GroupNonUniformShuffle`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformShuffle")] +#[inline] +pub unsafe fn subgroup_shuffle(value: T, id: u32) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%id = OpLoad _ {id}", + "%result = OpGroupNonUniformShuffle _ %subgroup %value %id", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + id = in(reg) &id, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the Value of the invocation identified by the current invocation’s id within the group xor’ed with Mask. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The type of Value must be the same as Result Type. +/// +/// Mask must be a scalar of integer type, whose Signedness operand is 0. +/// +/// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group. +/// +/// Requires Capability `GroupNonUniformShuffle`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformShuffleXor")] +#[inline] +pub unsafe fn subgroup_shuffle_xor(value: T, mask: u32) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%mask = OpLoad _ {mask}", + "%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + mask = in(reg) &mask, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the Value of the invocation identified by the current invocation’s id within the group - Delta. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The type of Value must be the same as Result Type. +/// +/// Delta must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive. +/// +/// Requires Capability `GroupNonUniformShuffleRelative`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformShuffleUp")] +#[inline] +pub unsafe fn subgroup_shuffle_up(value: T, delta: u32) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%delta = OpLoad _ {delta}", + "%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + delta = in(reg) &delta, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the Value of the invocation identified by the current invocation’s id within the group + Delta. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The type of Value must be the same as Result Type. +/// +/// Delta must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group. +/// +/// Requires Capability `GroupNonUniformShuffleRelative`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformShuffleDown")] +#[inline] +pub unsafe fn subgroup_shuffle_down(value: T, delta: u32) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%delta = OpLoad _ {delta}", + "%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + delta = in(reg) &delta, + result = in(reg) &mut result, + } + } + + result +} + +/// An integer add group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformIAdd")] +#[inline] +pub unsafe fn subgroup_i_add>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformIAdd _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// An integer add group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformIAdd")] +#[inline] +pub unsafe fn subgroup_i_add_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformIAdd _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point add group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFAdd")] +#[inline] +pub unsafe fn subgroup_f_add>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformFAdd _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point add group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFAdd")] +#[inline] +pub unsafe fn subgroup_f_add_clustered< + const CLUSTER_SIZE: u32, + F: VectorOrScalar, +>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformFAdd _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// An integer multiply group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 1. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformIMul")] +#[inline] +pub unsafe fn subgroup_i_mul>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformIMul _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// An integer multiply group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 1. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformIMul")] +#[inline] +pub unsafe fn subgroup_i_mul_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformIMul _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point multiply group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 1. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFMul")] +#[inline] +pub unsafe fn subgroup_f_mul>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformFMul _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point multiply group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 1. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFMul")] +#[inline] +pub unsafe fn subgroup_f_mul_clustered< + const CLUSTER_SIZE: u32, + F: VectorOrScalar, +>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformFMul _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A signed integer minimum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is INT_MAX. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformSMin")] +#[inline] +pub unsafe fn subgroup_s_min< + const GROUP_OP: u32, + S: VectorOrScalar, +>( + value: S, +) -> S { + let mut result = S::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformSMin _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A signed integer minimum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is INT_MAX. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformSMin")] +#[inline] +pub unsafe fn subgroup_s_min_clustered< + const CLUSTER_SIZE: u32, + S: VectorOrScalar, +>( + value: S, +) -> S { + let mut result = S::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformSMin _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// An unsigned integer minimum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is UINT_MAX. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformUMin")] +#[inline] +pub unsafe fn subgroup_u_min< + const GROUP_OP: u32, + U: VectorOrScalar, +>( + value: U, +) -> U { + let mut result = U::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformUMin _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// An unsigned integer minimum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is UINT_MAX. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformUMin")] +#[inline] +pub unsafe fn subgroup_u_min_clustered< + const CLUSTER_SIZE: u32, + U: VectorOrScalar, +>( + value: U, +) -> U { + let mut result = U::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformUMin _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point minimum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is +INF. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFMin")] +#[inline] +pub unsafe fn subgroup_f_min>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformFMin _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point minimum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is +INF. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFMin")] +#[inline] +pub unsafe fn subgroup_f_min_clustered< + const CLUSTER_SIZE: u32, + F: VectorOrScalar, +>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformFMin _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A signed integer maximum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is INT_MIN. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformSMax")] +#[inline] +pub unsafe fn subgroup_s_max< + const GROUP_OP: u32, + S: VectorOrScalar, +>( + value: S, +) -> S { + let mut result = S::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformSMax _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A signed integer maximum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is INT_MIN. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformSMax")] +#[inline] +pub unsafe fn subgroup_s_max_clustered< + const CLUSTER_SIZE: u32, + S: VectorOrScalar, +>( + value: S, +) -> S { + let mut result = S::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformSMax _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// An unsigned integer maximum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformUMax")] +#[inline] +pub unsafe fn subgroup_u_max< + const GROUP_OP: u32, + U: VectorOrScalar, +>( + value: U, +) -> U { + let mut result = U::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformUMax _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// An unsigned integer maximum group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformUMax")] +#[inline] +pub unsafe fn subgroup_u_max_clustered< + const CLUSTER_SIZE: u32, + U: VectorOrScalar, +>( + value: U, +) -> U { + let mut result = U::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformUMax _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point maximum group operation of all Value operands contributed by active invocations in by group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is -INF. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFMax")] +#[inline] +pub unsafe fn subgroup_f_max>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformFMax _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A floating point maximum group operation of all Value operands contributed by active invocations in by group. +/// +/// Result Type must be a scalar or vector of floating-point type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is -INF. +/// +/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformFMax")] +#[inline] +pub unsafe fn subgroup_f_max_clustered< + const CLUSTER_SIZE: u32, + F: VectorOrScalar, +>( + value: F, +) -> F { + let mut result = F::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformFMax _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A bitwise and group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is ~0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBitwiseAnd")] +#[inline] +pub unsafe fn subgroup_bitwise_and< + const GROUP_OP: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformBitwiseAnd _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A bitwise and group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is ~0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBitwiseAnd")] +#[inline] +pub unsafe fn subgroup_bitwise_and_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformBitwiseAnd _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A bitwise or group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBitwiseOr")] +#[inline] +pub unsafe fn subgroup_bitwise_or>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformBitwiseOr _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A bitwise or group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBitwiseOr")] +#[inline] +pub unsafe fn subgroup_bitwise_or_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformBitwiseOr _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A bitwise xor group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBitwiseXor")] +#[inline] +pub unsafe fn subgroup_bitwise_xor< + const GROUP_OP: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformBitwiseXor _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A bitwise xor group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of integer type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformBitwiseXor")] +#[inline] +pub unsafe fn subgroup_bitwise_xor_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformBitwiseXor _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A logical and group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is ~0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformLogicalAnd")] +#[inline] +pub unsafe fn subgroup_logical_and>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformLogicalAnd _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A logical and group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is ~0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformLogicalAnd")] +#[inline] +pub unsafe fn subgroup_logical_and_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformLogicalAnd _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A logical or group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformLogicalOr")] +#[inline] +pub unsafe fn subgroup_logical_or>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformLogicalOr _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A logical or group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformLogicalOr")] +#[inline] +pub unsafe fn subgroup_logical_or_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformLogicalOr _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A logical xor group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. +/// +/// The type of Value must be the same as Result Type. +/// +/// Requires Capability `GroupNonUniformArithmetic`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformLogicalXor")] +#[inline] +pub unsafe fn subgroup_logical_xor>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformLogicalXor _ %subgroup {groupop} %value", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OP, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// A logical xor group operation of all Value operands contributed by active invocations in the group. +/// +/// Result Type must be a scalar or vector of Boolean type. +/// +/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup. +/// +/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present. +/// +/// The type of Value must be the same as Result Type. +/// +/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior. +/// +/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformLogicalXor")] +#[inline] +pub unsafe fn subgroup_logical_xor_clustered< + const CLUSTER_SIZE: u32, + I: VectorOrScalar, +>( + value: I, +) -> I { + let mut result = I::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%clustersize = OpConstant %u32 {clustersize}", + "%result = OpGroupNonUniformLogicalXor _ %subgroup {groupop} %value %clustersize", + "OpStore {result} %result", + subgroup = const SUBGROUP, + groupop = const GROUP_OPERATION_CLUSTERED_REDUCE, + clustersize = const CLUSTER_SIZE, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} + +/// Result is the Value of the invocation within the quad with a quad index equal to Index. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup. +/// +/// The type of Value must be the same as Result Type. +/// +/// Index must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Before version 1.5, Index must come from a constant instruction. Starting with version 1.5, Index must be dynamically uniform. +/// +/// If the value of Index is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined. +/// +/// Requires Capability `GroupNonUniformQuad`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformQuadBroadcast")] +#[inline] +pub unsafe fn subgroup_quad_broadcast(value: T, id: u32) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%value = OpLoad _ {value}", + "%id = OpLoad _ {id}", + "%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %id", + "OpStore {result} %result", + subgroup = const SUBGROUP, + value = in(reg) &value, + id = in(reg) &id, + result = in(reg) &mut result, + } + } + + result +} + +/// Direction is the kind of swap to perform. +/// +/// Direction must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Direction must come from a constant instruction. +/// +/// The value returned in Result is the value provided to Value by another invocation in the same quad scope instance. The invocation providing this value is determined according to Direction. +/// +/// Requires Capability `GroupNonUniformQuad`. +pub enum QuadDirection { + /// A Direction of 0 indicates a horizontal swap; + /// - Invocations with quad indices of 0 and 1 swap values + /// - Invocations with quad indices of 2 and 3 swap values + Horizontal = 0, + /// A Direction of 1 indicates a vertical swap; + /// - Invocations with quad indices of 0 and 2 swap values + /// - Invocations with quad indices of 1 and 3 swap values + Vertical = 1, + /// A Direction of 2 indicates a diagonal swap; + /// - Invocations with quad indices of 0 and 3 swap values + /// - Invocations with quad indices of 1 and 2 swap values + Diagonal = 2, +} + +/// Swap the Value of the invocation within the quad with another invocation in the quad using Direction. +/// +/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type. +/// +/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup. +/// +/// The type of Value must be the same as Result Type. +/// +/// Direction is the kind of swap to perform. +/// +/// Direction must be a scalar of integer type, whose Signedness operand is 0. +/// +/// Direction must come from a constant instruction. +/// +/// The value returned in Result is the value provided to Value by another invocation in the same quad scope instance. The invocation providing this value is determined according to Direction. +/// +/// A Direction of 0 indicates a horizontal swap; +/// - Invocations with quad indices of 0 and 1 swap values +/// - Invocations with quad indices of 2 and 3 swap values +/// A Direction of 1 indicates a vertical swap; +/// - Invocations with quad indices of 0 and 2 swap values +/// - Invocations with quad indices of 1 and 3 swap values +/// A Direction of 2 indicates a diagonal swap; +/// - Invocations with quad indices of 0 and 3 swap values +/// - Invocations with quad indices of 1 and 2 swap values +/// +/// Direction must be one of the above values. +/// +/// If an active invocation reads Value from an inactive invocation, the resulting value is undefined. +/// +/// Requires Capability `GroupNonUniformQuad`. +#[spirv_std_macros::gpu_only] +#[doc(alias = "OpGroupNonUniformQuadSwap")] +#[inline] +pub unsafe fn subgroup_quad_swap(value: T) -> T { + let mut result = T::default(); + + unsafe { + asm! { + "%u32 = OpTypeInt 32 0", + "%subgroup = OpConstant %u32 {subgroup}", + "%direction = OpConstant %u32 {direction}", + "%value = OpLoad _ {value}", + "%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction", + "OpStore {result} %result", + subgroup = const SUBGROUP, + direction = const DIRECTION, + value = in(reg) &value, + result = in(reg) &mut result, + } + } + + result +} diff --git a/crates/spirv-std/src/float.rs b/crates/spirv-std/src/float.rs index 0a685b2468..aaf85c5051 100644 --- a/crates/spirv-std/src/float.rs +++ b/crates/spirv-std/src/float.rs @@ -61,30 +61,18 @@ pub fn f16x2_to_vec2>(int: u32) -> V { result } -// We don't have access to a concrete vector type (cfg(feature = "glam") might not be enabled), so -// synth up one manually. -#[cfg_attr(target_arch = "spirv", repr(simd))] -// sometimes dead because on cpu, the `gpu_only` macro nukes the method bodies -#[allow(dead_code)] -#[derive(Default)] -struct F32x2 { - x: f32, - y: f32, -} -unsafe impl Vector for F32x2 {} - /// Converts an f32 (float) into an f16 (half). The result is a u32, not a u16, due to GPU support /// for u16 not being universal - the upper 16 bits will always be zero. #[spirv_std_macros::gpu_only] pub fn f32_to_f16(float: f32) -> u32 { - vec2_to_f16x2(F32x2 { x: float, y: 0.0 }) + vec2_to_f16x2(glam::Vec2::new(float, 0.)) } /// Converts an f16 (half) into an f32 (float). The parameter is a u32, due to GPU support for u16 /// not being universal - the upper 16 bits are ignored. #[spirv_std_macros::gpu_only] pub fn f16_to_f32(packed: u32) -> f32 { - f16x2_to_vec2::(packed).x + f16x2_to_vec2::(packed).x } /// Packs a vec4 into 4 8-bit signed integers. See diff --git a/crates/spirv-std/src/scalar.rs b/crates/spirv-std/src/scalar.rs index 34d1f5db8c..e9ab3ae758 100644 --- a/crates/spirv-std/src/scalar.rs +++ b/crates/spirv-std/src/scalar.rs @@ -1,20 +1,28 @@ //! Traits related to scalars. +use crate::vector::{create_dim, VectorOrScalar}; +use core::num::NonZeroUsize; + /// Abstract trait representing a SPIR-V scalar type. /// /// # Safety /// Implementing this trait on non-scalar types breaks assumptions of other unsafe code, and should /// not be done. -pub unsafe trait Scalar: Copy + Default + crate::sealed::Sealed {} +pub unsafe trait Scalar: + VectorOrScalar + Copy + Default + crate::sealed::Sealed +{ +} + +macro_rules! impl_scalar { + ($($ty:ty),+) => { + $( + unsafe impl VectorOrScalar for $ty { + type Scalar = Self; + const DIM: NonZeroUsize = create_dim(1); + } + unsafe impl Scalar for $ty {} + )+ + }; +} -unsafe impl Scalar for bool {} -unsafe impl Scalar for f32 {} -unsafe impl Scalar for f64 {} -unsafe impl Scalar for u8 {} -unsafe impl Scalar for u16 {} -unsafe impl Scalar for u32 {} -unsafe impl Scalar for u64 {} -unsafe impl Scalar for i8 {} -unsafe impl Scalar for i16 {} -unsafe impl Scalar for i32 {} -unsafe impl Scalar for i64 {} +impl_scalar!(bool, f32, f64, u8, u16, u32, u64, i8, i16, i32, i64); diff --git a/crates/spirv-std/src/vector.rs b/crates/spirv-std/src/vector.rs index 19cdb144b9..668f40bd32 100644 --- a/crates/spirv-std/src/vector.rs +++ b/crates/spirv-std/src/vector.rs @@ -1,33 +1,58 @@ //! Traits related to vectors. +use crate::scalar::Scalar; +use core::num::NonZeroUsize; use glam::{Vec3Swizzles, Vec4Swizzles}; +/// Abstract trait representing either a vector or a scalar type. +/// +/// # Safety +/// Implementing this trait on non-scalar or non-vector types may break assumptions about other +/// unsafe code, and should not be done. +pub unsafe trait VectorOrScalar: Default { + /// Either the scalar component type of the vector or the scalar itself. + type Scalar: Scalar; + + /// The dimension of the vector, or 1 if it is a scalar + const DIM: NonZeroUsize; +} + +/// replace with `NonZeroUsize::new(n).unwrap()` once `unwrap()` is const stabilized +pub(crate) const fn create_dim(n: usize) -> NonZeroUsize { + match NonZeroUsize::new(n) { + None => panic!("dim must not be 0"), + Some(n) => n, + } +} + /// Abstract trait representing a SPIR-V vector type. /// /// # Safety /// Implementing this trait on non-simd-vector types breaks assumptions of other unsafe code, and /// should not be done. -pub unsafe trait Vector: Default {} +pub unsafe trait Vector: VectorOrScalar {} -unsafe impl Vector for glam::Vec2 {} -unsafe impl Vector for glam::Vec3 {} -unsafe impl Vector for glam::Vec3A {} -unsafe impl Vector for glam::Vec4 {} - -unsafe impl Vector for glam::DVec2 {} -unsafe impl Vector for glam::DVec3 {} -unsafe impl Vector for glam::DVec4 {} - -unsafe impl Vector for glam::UVec2 {} -unsafe impl Vector for glam::UVec3 {} -unsafe impl Vector for glam::UVec4 {} +macro_rules! impl_vector { + ($($scalar:ty: $($vec:ty => $dim:literal),+;)+) => { + $($( + unsafe impl VectorOrScalar for $vec { + type Scalar = $scalar; + const DIM: NonZeroUsize = create_dim($dim); + } + unsafe impl Vector<$scalar, $dim> for $vec {} + )+)+ + }; +} -unsafe impl Vector for glam::IVec2 {} -unsafe impl Vector for glam::IVec3 {} -unsafe impl Vector for glam::IVec4 {} +impl_vector! { + f32: glam::Vec2 => 2, glam::Vec3 => 3, glam::Vec3A => 3, glam::Vec4 => 4; + f64: glam::DVec2 => 2, glam::DVec3 => 3, glam::DVec4 => 4; + u32: glam::UVec2 => 2, glam::UVec3 => 3, glam::UVec4 => 4; + i32: glam::IVec2 => 2, glam::IVec3 => 3, glam::IVec4 => 4; +} /// Trait that implements slicing of a vector into a scalar or vector of lower dimensions, by -/// ignoring the highter dimensions +/// ignoring the higher dimensions pub trait VectorTruncateInto { /// Slices the vector into a lower dimensional type by ignoring the higher components fn truncate_into(self) -> T; diff --git a/tests/ui/arch/all.rs b/tests/ui/arch/all.rs index fd0d5e51db..472a2d82a0 100644 --- a/tests/ui/arch/all.rs +++ b/tests/ui/arch/all.rs @@ -2,8 +2,9 @@ #![feature(repr_simd)] +use core::num::NonZeroUsize; use spirv_std::spirv; -use spirv_std::{scalar::Scalar, vector::Vector}; +use spirv_std::{scalar::Scalar, vector::Vector, vector::VectorOrScalar}; /// HACK(shesp). Rust doesn't allow us to declare regular (tuple-)structs containing `bool` members /// as `#[repl(simd)]`. But we need this for `spirv_std::arch::any()` and `spirv_std::arch::all()` @@ -12,6 +13,13 @@ use spirv_std::{scalar::Scalar, vector::Vector}; /// it (for now at least) #[repr(simd)] struct Vec2(T, T); +unsafe impl VectorOrScalar for Vec2 { + type Scalar = T; + const DIM: NonZeroUsize = match NonZeroUsize::new(2) { + None => panic!(), + Some(n) => n, + }; +} unsafe impl Vector for Vec2 {} impl Default for Vec2 { diff --git a/tests/ui/arch/any.rs b/tests/ui/arch/any.rs index 29cdc14626..c61928fed9 100644 --- a/tests/ui/arch/any.rs +++ b/tests/ui/arch/any.rs @@ -2,8 +2,9 @@ #![feature(repr_simd)] +use core::num::NonZeroUsize; use spirv_std::spirv; -use spirv_std::{scalar::Scalar, vector::Vector}; +use spirv_std::{scalar::Scalar, vector::Vector, vector::VectorOrScalar}; /// HACK(shesp). Rust doesn't allow us to declare regular (tuple-)structs containing `bool` members /// as `#[repl(simd)]`. But we need this for `spirv_std::arch::any()` and `spirv_std::arch::all()` @@ -12,6 +13,13 @@ use spirv_std::{scalar::Scalar, vector::Vector}; /// it (for now at least) #[repr(simd)] struct Vec2(T, T); +unsafe impl VectorOrScalar for Vec2 { + type Scalar = T; + const DIM: NonZeroUsize = match NonZeroUsize::new(2) { + None => panic!(), + Some(n) => n, + }; +} unsafe impl Vector for Vec2 {} impl Default for Vec2 { diff --git a/tests/ui/arch/subgroup/subgroup_ballot.rs b/tests/ui/arch/subgroup/subgroup_ballot.rs new file mode 100644 index 0000000000..aaa0142037 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_ballot.rs @@ -0,0 +1,17 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_ballot::subgroup_ballot + +use spirv_std::spirv; + +unsafe fn subgroup_ballot(predicate: bool) -> bool { + let ballot = spirv_std::arch::subgroup_ballot(predicate); + spirv_std::arch::subgroup_inverse_ballot(ballot) +} + +#[spirv(compute(threads(1, 1, 1)))] +pub fn main() { + unsafe { + subgroup_ballot(true); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_ballot.stderr b/tests/ui/arch/subgroup/subgroup_ballot.stderr new file mode 100644 index 0000000000..13676166e7 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_ballot.stderr @@ -0,0 +1,10 @@ +%1 = OpFunction %2 None %3 +%4 = OpFunctionParameter %2 +%5 = OpLabel +OpLine %6 379 8 +%7 = OpGroupNonUniformBallot %8 %9 %4 +OpLine %6 415 8 +%10 = OpGroupNonUniformInverseBallot %2 %9 %7 +OpNoLine +OpReturnValue %10 +OpFunctionEnd diff --git a/tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs new file mode 100644 index 0000000000..166fdabcbc --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs @@ -0,0 +1,17 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_ballot_bit_count::subgroup_ballot_bit_count + +use spirv_std::arch::{GroupOperation, SubgroupMask}; +use spirv_std::spirv; + +unsafe fn subgroup_ballot_bit_count(ballot: SubgroupMask) -> u32 { + spirv_std::arch::subgroup_ballot_bit_count::<{ GroupOperation::Reduce as u32 }>(ballot) +} + +#[spirv(compute(threads(1, 1, 1)))] +pub fn main() { + unsafe { + subgroup_ballot_bit_count(spirv_std::arch::subgroup_ballot(true)); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_ballot_bit_count.stderr b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.stderr new file mode 100644 index 0000000000..319877327e --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.stderr @@ -0,0 +1,8 @@ +%1 = OpFunction %2 None %3 +%4 = OpFunctionParameter %5 +%6 = OpLabel +OpLine %7 491 8 +%8 = OpGroupNonUniformBallotBitCount %2 %9 Reduce %4 +OpNoLine +OpReturnValue %8 +OpFunctionEnd diff --git a/tests/ui/arch/subgroup/subgroup_broadcast_first.rs b/tests/ui/arch/subgroup/subgroup_broadcast_first.rs new file mode 100644 index 0000000000..c2544101c2 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_broadcast_first.rs @@ -0,0 +1,17 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_broadcast_first::subgroup_broadcast_first + +use glam::Vec3; +use spirv_std::spirv; + +unsafe fn subgroup_broadcast_first(vec: Vec3) -> Vec3 { + spirv_std::arch::subgroup_broadcast_first::(vec) +} + +#[spirv(compute(threads(1, 1, 1)))] +pub fn main() { + unsafe { + subgroup_broadcast_first(Vec3::new(1., 2., 3.)); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_broadcast_first.stderr b/tests/ui/arch/subgroup/subgroup_broadcast_first.stderr new file mode 100644 index 0000000000..84f784d58e --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_broadcast_first.stderr @@ -0,0 +1,8 @@ +%1 = OpFunction %2 None %3 +%4 = OpFunctionParameter %2 +%5 = OpLabel +OpLine %6 346 8 +%7 = OpGroupNonUniformBroadcastFirst %2 %8 %4 +OpNoLine +OpReturnValue %7 +OpFunctionEnd diff --git a/tests/ui/arch/subgroup/subgroup_builtins.rs b/tests/ui/arch/subgroup/subgroup_builtins.rs new file mode 100644 index 0000000000..7524fa7b5f --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_builtins.rs @@ -0,0 +1,17 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model + +use spirv_std::arch::SubgroupMask; +use spirv_std::spirv; + +#[spirv(compute(threads(1, 1, 1)))] +pub fn main( + #[spirv(subgroup_id)] subgroup_id: u32, + #[spirv(subgroup_local_invocation_id)] subgroup_local_invocation_id: u32, + #[spirv(subgroup_eq_mask)] subgroup_eq_mask: SubgroupMask, + #[spirv(subgroup_ge_mask)] subgroup_ge_mask: SubgroupMask, + #[spirv(subgroup_gt_mask)] subgroup_gt_mask: SubgroupMask, + #[spirv(subgroup_le_mask)] subgroup_le_mask: SubgroupMask, + #[spirv(subgroup_lt_mask)] subgroup_lt_mask: SubgroupMask, +) { +} diff --git a/tests/ui/arch/subgroup/subgroup_elect.rs b/tests/ui/arch/subgroup/subgroup_elect.rs new file mode 100644 index 0000000000..4aee376c6b --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_elect.rs @@ -0,0 +1,16 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_elect::subgroup_elect + +use spirv_std::spirv; + +unsafe fn subgroup_elect() -> bool { + spirv_std::arch::subgroup_elect() +} + +#[spirv(compute(threads(1, 1, 1)))] +pub fn main() { + unsafe { + subgroup_elect(); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_elect.stderr b/tests/ui/arch/subgroup/subgroup_elect.stderr new file mode 100644 index 0000000000..73bf0b2778 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_elect.stderr @@ -0,0 +1,7 @@ +%1 = OpFunction %2 None %3 +%4 = OpLabel +OpLine %5 181 8 +%6 = OpGroupNonUniformElect %2 %7 +OpNoLine +OpReturnValue %6 +OpFunctionEnd diff --git a/tests/ui/arch/subgroup/subgroup_i_add_clustered.rs b/tests/ui/arch/subgroup/subgroup_i_add_clustered.rs new file mode 100644 index 0000000000..f4cc511461 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_clustered.rs @@ -0,0 +1,18 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+GroupNonUniformClustered,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_clustered::subgroup_i_add_clustered + +use glam::UVec3; +use spirv_std::arch::{GroupOperation, SubgroupMask}; +use spirv_std::spirv; + +unsafe fn subgroup_i_add_clustered(value: u32) -> u32 { + spirv_std::arch::subgroup_i_add_clustered::<8, _>(value) +} + +#[spirv(compute(threads(32, 1, 1)))] +pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) { + unsafe { + subgroup_i_add_clustered(local_invocation_id.x); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_i_add_clustered.stderr b/tests/ui/arch/subgroup/subgroup_i_add_clustered.stderr new file mode 100644 index 0000000000..f52c1c0632 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_clustered.stderr @@ -0,0 +1,8 @@ +%1 = OpFunction %2 None %3 +%4 = OpFunctionParameter %2 +%5 = OpLabel +OpLine %6 782 8 +%7 = OpGroupNonUniformIAdd %2 %8 ClusteredReduce %4 %9 +OpNoLine +OpReturnValue %7 +OpFunctionEnd diff --git a/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.rs b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.rs new file mode 100644 index 0000000000..a10d0b3682 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.rs @@ -0,0 +1,18 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_exclusive_scan::subgroup_i_add_exclusive_scan + +use glam::UVec3; +use spirv_std::arch::{GroupOperation, SubgroupMask}; +use spirv_std::spirv; + +unsafe fn subgroup_i_add_exclusive_scan(value: u32) -> u32 { + spirv_std::arch::subgroup_i_add::<{ GroupOperation::ExclusiveScan as u32 }, _>(value) +} + +#[spirv(compute(threads(32, 1, 1)))] +pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) { + unsafe { + subgroup_i_add_exclusive_scan(local_invocation_id.x); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.stderr new file mode 100644 index 0000000000..bf7dd9f2b9 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.stderr @@ -0,0 +1,8 @@ +%1 = OpFunction %2 None %3 +%4 = OpFunctionParameter %2 +%5 = OpLabel +OpLine %6 741 8 +%7 = OpGroupNonUniformIAdd %2 %8 ExclusiveScan %4 +OpNoLine +OpReturnValue %7 +OpFunctionEnd diff --git a/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs new file mode 100644 index 0000000000..ba823eac10 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs @@ -0,0 +1,18 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_inclusive_scan::subgroup_i_add_inclusive_scan + +use glam::UVec3; +use spirv_std::arch::{GroupOperation, SubgroupMask}; +use spirv_std::spirv; + +unsafe fn subgroup_i_add_inclusive_scan(value: u32) -> u32 { + spirv_std::arch::subgroup_i_add::<{ GroupOperation::InclusiveScan as u32 }, _>(value) +} + +#[spirv(compute(threads(32, 1, 1)))] +pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) { + unsafe { + subgroup_i_add_inclusive_scan(local_invocation_id.x); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.stderr new file mode 100644 index 0000000000..cb69054815 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.stderr @@ -0,0 +1,8 @@ +%1 = OpFunction %2 None %3 +%4 = OpFunctionParameter %2 +%5 = OpLabel +OpLine %6 741 8 +%7 = OpGroupNonUniformIAdd %2 %8 InclusiveScan %4 +OpNoLine +OpReturnValue %7 +OpFunctionEnd diff --git a/tests/ui/arch/subgroup/subgroup_i_add_reduce.rs b/tests/ui/arch/subgroup/subgroup_i_add_reduce.rs new file mode 100644 index 0000000000..4a8a42dbf6 --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_reduce.rs @@ -0,0 +1,18 @@ +// build-pass +// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model +// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_reduce::subgroup_i_add_reduce + +use glam::UVec3; +use spirv_std::arch::{GroupOperation, SubgroupMask}; +use spirv_std::spirv; + +unsafe fn subgroup_i_add_reduce(value: u32) -> u32 { + spirv_std::arch::subgroup_i_add::<{ GroupOperation::Reduce as u32 }, _>(value) +} + +#[spirv(compute(threads(32, 1, 1)))] +pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) { + unsafe { + subgroup_i_add_reduce(local_invocation_id.x); + } +} diff --git a/tests/ui/arch/subgroup/subgroup_i_add_reduce.stderr b/tests/ui/arch/subgroup/subgroup_i_add_reduce.stderr new file mode 100644 index 0000000000..6501d5ce1d --- /dev/null +++ b/tests/ui/arch/subgroup/subgroup_i_add_reduce.stderr @@ -0,0 +1,8 @@ +%1 = OpFunction %2 None %3 +%4 = OpFunctionParameter %2 +%5 = OpLabel +OpLine %6 741 8 +%7 = OpGroupNonUniformIAdd %2 %8 Reduce %4 +OpNoLine +OpReturnValue %7 +OpFunctionEnd