diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index bede79610a..834976f83b 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -53,12 +53,13 @@ use crate::{ use features::FeaturesManager; use std::{ cmp::Ordering, - fmt, - fmt::{Error as FmtError, Write}, + fmt::{self, Error as FmtError, Write}, mem, }; use thiserror::Error; +use super::zero_init; + /// Contains the features related code and the features querying method mod features; /// Contains a constant with a slice of all the reserved keywords RESERVED_KEYWORDS @@ -1685,10 +1686,10 @@ impl<'a, W: Write> Writer<'a, W> { // Close the parentheses and open braces to start the function body writeln!(self.out, ") {{")?; - if self.options.zero_initialize_workgroup_memory - && ctx.ty.is_compute_entry_point(self.module) - { - self.write_workgroup_variables_initialization(&ctx)?; + if self.options.zero_initialize_workgroup_memory { + if let Some(workgroup_size) = ctx.ty.compute_entry_point_workgroup_size(self.module) { + self.write_workgroup_variables_initialization(&ctx, workgroup_size)?; + } } // Compose the function arguments from globals, in case of an entry point. @@ -1780,31 +1781,80 @@ impl<'a, W: Write> Writer<'a, W> { fn write_workgroup_variables_initialization( &mut self, ctx: &back::FunctionCtx, + workgroup_size: [u32; 3], ) -> BackendResult { - let mut vars = self + let vars = self .module .global_variables .iter() .filter(|&(handle, var)| { !ctx.info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup - }) - .peekable(); - - if vars.peek().is_some() { - let level = back::Level(1); + }); + let zero_init_res = + zero_init::zero_init(&self.module, vars, workgroup_size.into_iter().product()); + if zero_init_res.is_empty() { + return Ok(()); + } - writeln!(self.out, "{level}if (gl_LocalInvocationID == uvec3(0u)) {{")?; + let mut level = back::Level(1); + let mut remainder = None; - for (handle, var) in vars { - let name = &self.names[&NameKey::GlobalVariable(handle)]; - write!(self.out, "{}{} = ", level.next(), name)?; - self.write_zero_init_value(var.ty)?; - writeln!(self.out, ";")?; + for (handle, init) in zero_init_res { + match init { + zero_init::ZeroInitKind::LocalPlusIndex { + index, + if_less_than, + } => { + if if_less_than != remainder { + let Some(if_less_than) = if_less_than else { + panic!("Got decrementing index") + }; + remainder = Some(if_less_than); + writeln!( + self.out, + "{level}if (gl_LocalInvocationIndex < {if_less_than}u) {{" + )?; + level = level.next(); + } + let var = &self.module.global_variables[handle]; + let base_type = match &self.module.types[var.ty].inner { + TypeInner::Array { base, .. } => base, + _ => unreachable!(), + }; + let name = &self.names[&NameKey::GlobalVariable(handle)]; + if let Some(index) = index { + write!( + self.out, + "{}{}[gl_LocalInvocationIndex + {index}u] = ", + level, name + )?; + } else { + write!(self.out, "{}{}[gl_LocalInvocationIndex] = ", level, name)?; + } + self.write_zero_init_value(*base_type)?; + writeln!(self.out, ";")?; + } + zero_init::ZeroInitKind::NotArray => { + if remainder != Some(1) { + writeln!(self.out, "{level}if (gl_LocalInvocationIndex < 1u) {{")?; + level = level.next(); + remainder = Some(1); + } + let name = &self.names[&NameKey::GlobalVariable(handle)]; + write!(self.out, "{}{} = ", level.next(), name)?; + let var = &self.module.global_variables[handle]; + self.write_zero_init_value(var.ty)?; + writeln!(self.out, ";")?; + } } - - writeln!(self.out, "{level}}}")?; - self.write_barrier(crate::Barrier::WORK_GROUP, level)?; } + // Close all opened brackets + for level in (1..level.0).rev() { + writeln!(self.out, "{}}}", back::Level(level))?; + } + level = back::Level(1); + + self.write_barrier(crate::Barrier::WORK_GROUP, level)?; Ok(()) } diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index d4c6097eb3..85f349bbd8 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -4,7 +4,7 @@ use super::{ BackendResult, Error, Options, }; use crate::{ - back, + back::{self, zero_init}, proc::{self, NameKey}, valid, Handle, Module, ScalarKind, ShaderStage, TypeInner, }; @@ -1113,8 +1113,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { // Write function name write!(self.out, " {name}(")?; - let need_workgroup_variables_initialization = - self.need_workgroup_variables_initialization(func_ctx, module); + let workgroup_size_for_initialization = + self.workgroup_size_for_variables_initialization(func_ctx, module); // Write function arguments for non entry point functions match func_ctx.ty { @@ -1169,11 +1169,11 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { } } - if need_workgroup_variables_initialization { + if workgroup_size_for_initialization.is_some() { if !func.arguments.is_empty() { write!(self.out, ", ")?; } - write!(self.out, "uint3 __local_invocation_id : SV_GroupThreadID")?; + write!(self.out, "uint __local_invocation_index : SV_GroupIndex")?; } } } @@ -1197,8 +1197,8 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { writeln!(self.out)?; writeln!(self.out, "{{")?; - if need_workgroup_variables_initialization { - self.write_workgroup_variables_initialization(func_ctx, module)?; + if let Some(workgroup_size) = workgroup_size_for_initialization { + self.write_workgroup_variables_initialization(func_ctx, module, workgroup_size)?; } if let back::FunctionType::EntryPoint(index) = func_ctx.ty { @@ -1249,43 +1249,95 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { Ok(()) } - fn need_workgroup_variables_initialization( + fn workgroup_size_for_variables_initialization( &mut self, func_ctx: &back::FunctionCtx, module: &Module, - ) -> bool { - self.options.zero_initialize_workgroup_memory - && func_ctx.ty.is_compute_entry_point(module) + ) -> Option<[u32; 3]> { + (self.options.zero_initialize_workgroup_memory && module.global_variables.iter().any(|(handle, var)| { !func_ctx.info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup - }) + })) + .then_some(|| ()) + .and(func_ctx.ty.compute_entry_point_workgroup_size(module)) } fn write_workgroup_variables_initialization( &mut self, func_ctx: &back::FunctionCtx, module: &Module, + workgroup_size: [u32; 3], ) -> BackendResult { - let level = back::Level(1); - - writeln!( - self.out, - "{level}if (all(__local_invocation_id == uint3(0u, 0u, 0u))) {{" - )?; - let vars = module.global_variables.iter().filter(|&(handle, var)| { !func_ctx.info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup }); - for (handle, var) in vars { - let name = &self.names[&NameKey::GlobalVariable(handle)]; - write!(self.out, "{}{} = ", level.next(), name)?; - self.write_default_init(module, var.ty)?; - writeln!(self.out, ";")?; + let zero_init_res = + zero_init::zero_init(&module, vars, workgroup_size.into_iter().product()); + if zero_init_res.is_empty() { + return Ok(()); + } + + let mut level = back::Level(1); + let mut remainder = None; + + for (handle, init) in zero_init_res { + match init { + zero_init::ZeroInitKind::LocalPlusIndex { + index, + if_less_than, + } => { + if if_less_than != remainder { + let Some(if_less_than) = if_less_than else { + panic!("Got decrementing index") + }; + remainder = Some(if_less_than); + writeln!( + self.out, + "{level}if (__local_invocation_index < {if_less_than}u) {{" + )?; + level = level.next(); + } + let var = &module.global_variables[handle]; + let base_type = match &module.types[var.ty].inner { + TypeInner::Array { base, .. } => base, + _ => unreachable!(), + }; + let name = &self.names[&NameKey::GlobalVariable(handle)]; + if let Some(index) = index { + write!( + self.out, + "{}{}[__local_invocation_index + {index}u] = ", + level, name + )?; + } else { + write!(self.out, "{}{}[__local_invocation_index] = ", level, name)?; + } + self.write_default_init(module, *base_type)?; + writeln!(self.out, ";")?; + } + zero_init::ZeroInitKind::NotArray => { + if remainder != Some(1) { + writeln!(self.out, "{level}if (__local_invocation_index < 1u) {{")?; + level = level.next(); + remainder = Some(1); + } + let name = &self.names[&NameKey::GlobalVariable(handle)]; + write!(self.out, "{}{} = ", level.next(), name)?; + let var = &module.global_variables[handle]; + self.write_default_init(module, var.ty)?; + writeln!(self.out, ";")?; + } + } + } + // Close all opened brackets + for level in (1..level.0).rev() { + writeln!(self.out, "{}}}", back::Level(level))?; } + level = back::Level(1); - writeln!(self.out, "{level}}}")?; - self.write_barrier(crate::Barrier::WORK_GROUP, level) + self.write_barrier(crate::Barrier::WORK_GROUP, level)?; + Ok(()) } /// Helper method used to write statements diff --git a/naga/src/back/mod.rs b/naga/src/back/mod.rs index 0c9c5e4761..fbca0fa9ab 100644 --- a/naga/src/back/mod.rs +++ b/naga/src/back/mod.rs @@ -24,6 +24,14 @@ pub mod wgsl; ))] pub mod pipeline_constants; +#[cfg(any( + feature = "hlsl-out", + feature = "msl-out", + feature = "spv-out", + feature = "glsl-out" +))] +pub(crate) mod zero_init; + /// Names of vector components. pub const COMPONENTS: &[char] = &['x', 'y', 'z', 'w']; /// Indent for backends. @@ -86,12 +94,14 @@ pub enum FunctionType { impl FunctionType { /// Returns true if the function is an entry point for a compute shader. - pub fn is_compute_entry_point(&self, module: &crate::Module) -> bool { + pub fn compute_entry_point_workgroup_size(&self, module: &crate::Module) -> Option<[u32; 3]> { match *self { FunctionType::EntryPoint(index) => { - module.entry_points[index as usize].stage == crate::ShaderStage::Compute + let entry_point = &module.entry_points[index as usize]; + (entry_point.stage == crate::ShaderStage::Compute) + .then_some(entry_point.workgroup_size) } - FunctionType::Function(_) => false, + FunctionType::Function(_) => None, } } } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index cf96fa59b4..d53ef852b4 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -8,15 +8,19 @@ use super::{ }; use crate::{ arena::{Handle, UniqueArena}, - back::spv::BindingInfo, + back::{spv::BindingInfo, zero_init::zero_init}, proc::{Alignment, TypeResolution}, valid::{FunctionInfo, ModuleInfo}, }; use spirv::Word; -use std::collections::hash_map::Entry; +use std::{ + collections::{hash_map::Entry, HashMap}, + num::NonZeroU32, +}; struct FunctionInterface<'a> { varying_ids: &'a mut Vec, + workgroup_size: [u32; 3], stage: crate::ShaderStage, } @@ -262,6 +266,24 @@ impl Writer { self.get_type_id(local_type.into()) } + pub(super) fn get_uint_pointer_type_id(&mut self, class: spirv::StorageClass) -> Word { + let lookup_type = LookupType::Local(LocalType::Value { + vector_size: None, + scalar: crate::Scalar::U32, + pointer_space: Some(class), + }); + if let Some(&id) = self.lookup_type.get(&lookup_type) { + id + } else { + let id = self.id_gen.next(); + let ty_id = self.get_uint_type_id(); + let instruction = Instruction::type_pointer(id, class, ty_id); + instruction.to_words(&mut self.logical_layout.declarations); + self.lookup_type.insert(lookup_type, id); + id + } + } + pub(super) fn get_float_pointer_type_id(&mut self, class: spirv::StorageClass) -> Word { let lookup_type = LookupType::Local(LocalType::Value { vector_size: None, @@ -338,7 +360,7 @@ impl Writer { results: Vec::new(), }; - let mut local_invocation_id = None; + let mut local_invocation_index = None; let mut parameter_type_ids = Vec::with_capacity(ir_function.arguments.len()); for argument in ir_function.arguments.iter() { @@ -371,8 +393,8 @@ impl Writer { .body .push(Instruction::load(argument_type_id, id, varying_id, None)); - if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { - local_invocation_id = Some(id); + if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationIndex) { + local_invocation_index = Some(id); } id @@ -401,7 +423,7 @@ impl Writer { constituent_ids.push(id); if binding == &crate::Binding::BuiltIn(crate::BuiltIn::GlobalInvocationId) { - local_invocation_id = Some(id); + local_invocation_index = Some(id); } } prelude.body.push(Instruction::composite_construct( @@ -690,7 +712,7 @@ impl Writer { next_id, ir_module, info, - local_invocation_id, + local_invocation_index, interface, context.function, ), @@ -752,6 +774,7 @@ impl Writer { Some(FunctionInterface { varying_ids: &mut interface_ids, stage: entry_point.stage, + workgroup_size: entry_point.workgroup_size, }), debug_info, )?; @@ -1325,41 +1348,32 @@ impl Writer { entry_id: Word, ir_module: &crate::Module, info: &FunctionInfo, - local_invocation_id: Option, + local_invocation_index: Option, interface: &mut FunctionInterface, function: &mut Function, ) -> Option { - let body = ir_module - .global_variables - .iter() - .filter(|&(handle, var)| { - !info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup - }) - .map(|(handle, var)| { - // It's safe to use `var_id` here, not `access_id`, because only - // variables in the `Uniform` and `StorageBuffer` address spaces - // get wrapped, and we're initializing `WorkGroup` variables. - let var_id = self.global_variables[handle.index()].var_id; - let var_type_id = self.get_type_id(LookupType::Handle(var.ty)); - let init_word = self.get_constant_null(var_type_id); - Instruction::store(var_id, init_word, None) - }) - .collect::>(); + let variables = ir_module.global_variables.iter().filter(|&(handle, var)| { + !info[handle].is_empty() && var.space == crate::AddressSpace::WorkGroup + }); - if body.is_empty() { + let variables = zero_init( + ir_module, + variables, + interface.workgroup_size.into_iter().product(), + ); + if variables.is_empty() { return None; } - - let uint3_type_id = self.get_uint3_type_id(); - let mut pre_if_block = Block::new(entry_id); - let local_invocation_id = if let Some(local_invocation_id) = local_invocation_id { - local_invocation_id + let uint_type_id = self.get_uint_type_id(); + + let local_invocation_index = if let Some(local_invocation_index) = local_invocation_index { + local_invocation_index } else { let varying_id = self.id_gen.next(); let class = spirv::StorageClass::Input; - let pointer_type_id = self.get_uint3_pointer_type_id(class); + let pointer_type_id = self.get_uint_pointer_type_id(class); Instruction::variable(pointer_type_id, varying_id, class, None) .to_words(&mut self.logical_layout.declarations); @@ -1367,64 +1381,248 @@ impl Writer { self.decorate( varying_id, spirv::Decoration::BuiltIn, - &[spirv::BuiltIn::LocalInvocationId as u32], + &[spirv::BuiltIn::LocalInvocationIndex as u32], ); interface.varying_ids.push(varying_id); let id = self.id_gen.next(); pre_if_block .body - .push(Instruction::load(uint3_type_id, id, varying_id, None)); + .push(Instruction::load(uint_type_id, id, varying_id, None)); id }; + let barrier_block_id = self.id_gen.next(); + + let block_id = self.id_gen.next(); + function.consume(pre_if_block, Instruction::branch(block_id)); + + let remainder = None; + let mut additions = HashMap::::new(); + self.handle_first_zero_init( + &mut additions, + &variables, + remainder, + block_id, + barrier_block_id, + local_invocation_index, + ir_module, + function, + ); - let zero_id = self.get_constant_null(uint3_type_id); - let bool3_type_id = self.get_bool3_type_id(); - - let eq_id = self.id_gen.next(); - pre_if_block.body.push(Instruction::binary( - spirv::Op::IEqual, - bool3_type_id, - eq_id, - local_invocation_id, - zero_id, - )); - - let condition_id = self.id_gen.next(); - let bool_type_id = self.get_bool_type_id(); - pre_if_block.body.push(Instruction::relational( - spirv::Op::All, - bool_type_id, - condition_id, - eq_id, - )); + let mut barrier_block = Block::new(barrier_block_id); + self.write_barrier(crate::Barrier::WORK_GROUP, &mut barrier_block); - let merge_id = self.id_gen.next(); - pre_if_block.body.push(Instruction::selection_merge( - merge_id, - spirv::SelectionControl::NONE, - )); - - let accept_id = self.id_gen.next(); - function.consume( - pre_if_block, - Instruction::branch_conditional(condition_id, accept_id, merge_id), - ); + let next_id = self.id_gen.next(); + function.consume(barrier_block, Instruction::branch(next_id)); + Some(next_id) + } - let accept_block = Block { - label_id: accept_id, - body, + fn handle_first_zero_init( + &mut self, + additions: &mut HashMap, + variables: &[( + Handle, + crate::back::zero_init::ZeroInitKind, + )], + current_remainder: Option, + block_id: Word, + exit_block: Word, + local_invocation_index: Word, + ir_module: &crate::Module, + function: &mut Function, + ) { + let block = Block::new(block_id); + let Some(((handle, kind), remainder)) = variables.split_first() else { + function.consume(block, Instruction::branch(exit_block)); + return; }; - function.consume(accept_block, Instruction::branch(merge_id)); - let mut post_if_block = Block::new(merge_id); + match kind { + crate::back::zero_init::ZeroInitKind::LocalPlusIndex { + index, + if_less_than, + } => { + self.reduce_remainder_then( + current_remainder, + *if_less_than, + block, + local_invocation_index, + function, + additions, + ir_module, + remainder, + exit_block, + |this, + block: &mut Block, + additions: &mut HashMap, + ir_module: &crate::Module| { + this.zero_init_array_member( + block, + handle, + *index, + additions, + local_invocation_index, + ir_module, + ) + }, + ); + } + crate::back::zero_init::ZeroInitKind::NotArray => { + self.reduce_remainder_then( + current_remainder, + Some(1), + block, + local_invocation_index, + function, + additions, + ir_module, + remainder, + exit_block, + |this, + block: &mut Block, + _: &mut HashMap, + ir_module: &crate::Module| { + // It's safe to use `var_id` here, not `access_id`, because only + // variables in the `Uniform` and `StorageBuffer` address spaces + // get wrapped, and we're initializing `WorkGroup` variables. + let var_id = this.global_variables[handle.index()].var_id; + let var_type_id = this.get_type_id(LookupType::Handle( + ir_module.global_variables[*handle].ty, + )); + let init_word = this.get_constant_null(var_type_id); + block.body.push(Instruction::store(var_id, init_word, None)) + }, + ); + } + } + } - self.write_barrier(crate::Barrier::WORK_GROUP, &mut post_if_block); + fn reduce_remainder_then( + &mut self, + current_remainder: Option, + target_remainder: Option, + mut consuming: Block, + local_invocation_index: u32, + function: &mut Function, + additions: &mut HashMap, + ir_module: &crate::Module, + remainder: &[( + Handle, + crate::back::zero_init::ZeroInitKind, + )], + exit_block: u32, + f: impl FnOnce(&mut Self, &mut Block, &mut HashMap, &crate::Module), + ) { + if current_remainder != target_remainder { + if let Some(target_remainder) = target_remainder { + let less_than_remainder = self.id_gen.next(); + let constant_id = self.get_constant_scalar(crate::Literal::U32(target_remainder)); + let bool_type_id = self.get_bool_type_id(); + consuming.body.push(Instruction::binary( + spirv::Op::ULessThan, + bool_type_id, + less_than_remainder, + local_invocation_index, + constant_id, + )); - let next_id = self.id_gen.next(); - function.consume(post_if_block, Instruction::branch(next_id)); - Some(next_id) + let merge_id = self.id_gen.next(); + consuming.body.push(Instruction::selection_merge( + merge_id, + spirv::SelectionControl::NONE, + )); + let accept_id = self.id_gen.next(); + function.consume( + consuming, + Instruction::branch_conditional(less_than_remainder, accept_id, merge_id), + ); + let mut accept_block = Block::new(accept_id); + f(&mut *self, &mut accept_block, additions, ir_module); + + let next_id = self.id_gen.next(); + function.consume(accept_block, Instruction::branch(next_id)); + self.handle_first_zero_init( + additions, + remainder, + Some(target_remainder), + next_id, + merge_id, + local_invocation_index, + ir_module, + function, + ); + let block = Block::new(merge_id); + function.consume(block, Instruction::branch(exit_block)); + return; + } else { + panic!( + "Went from having a remainder to not wanting one. Would otherwise fail to zero some items" + ); + } + } else { + let next_id = self.id_gen.next(); + f(&mut *self, &mut consuming, additions, ir_module); + function.consume(consuming, Instruction::branch(next_id)); + + self.handle_first_zero_init( + additions, + remainder, + current_remainder, + next_id, + exit_block, + local_invocation_index, + ir_module, + function, + ); + } + } + + fn zero_init_array_member( + &mut self, + block: &mut Block, + handle: &Handle, + index: Option, + additions: &mut HashMap, + local_invocation_index: Word, + ir_module: &crate::Module, + ) { + let uint_type_id = self.get_uint_type_id(); + let index_id = if let Some(index) = index { + *additions.entry(index.get()).or_insert_with(|| { + let constant_id = self.get_constant_scalar(crate::Literal::U32(index.get())); + let index_id = self.id_gen.next(); + block.body.push(Instruction::binary( + spirv::Op::IAdd, + uint_type_id, + index_id, + local_invocation_index, + constant_id, + )); + index_id + }) + } else { + local_invocation_index + }; + let var_id = self.global_variables[handle.index()].var_id; + let handle = ir_module.global_variables[*handle].ty; + let crate::TypeInner::Array { base, .. } = ir_module.types[handle].inner else { + unreachable!("We know that the type is an array") + }; + let result_type_id = self.get_type_id(LookupType::Handle(base)); + let pointer_type = self.id_gen.next(); + let instruction = + Instruction::type_pointer(pointer_type, spirv::StorageClass::Workgroup, result_type_id); + instruction.to_words(&mut self.logical_layout.declarations); + let pointer_id = self.id_gen.next(); + block.body.push(Instruction::access_chain( + pointer_type, + pointer_id, + var_id, + &[index_id], + )); + let null = self.get_constant_null(result_type_id); + block.body.push(Instruction::store(pointer_id, null, None)); } /// Generate an `OpVariable` for one value in an [`EntryPoint`]'s IO interface. diff --git a/naga/src/back/zero_init.rs b/naga/src/back/zero_init.rs new file mode 100644 index 0000000000..2a25d9d3bd --- /dev/null +++ b/naga/src/back/zero_init.rs @@ -0,0 +1,124 @@ +use std::num::NonZeroU32; + +use crate::{GlobalVariable, Handle, Module, Type}; + +#[derive(Debug)] +pub(crate) enum ZeroInitKind { + LocalPlusIndex { + // The amount local_invocation_index should be multiplied by + // We could use this to implement a more cache-efficient zeroing for big arrays, + // i.e. multiply each by 2, then add 0, 1, etc + // so each thread handles items which are next to each other + // multiple: Option, + /// The amount to be added to local_invocation_index + index: Option, + /// The amount to + if_less_than: Option, + }, + NotArray, +} + +/// A helper driver for implementing zero initialisation +/// +/// This is needed because of https://github.com/gfx-rs/wgpu/issues/4592. +/// That is, the previously used behaviour had significant compilation time costs +pub(crate) fn zero_init<'a>( + module: &'a Module, + variables: impl Iterator, &'a GlobalVariable)>, + workgroup_length: u32, +) -> Vec<(Handle, ZeroInitKind)> { + if workgroup_length == 0 { + // No need to zero initialise, as we won't get any values anyway + return vec![]; + } + let mut total_len = 0; + let mut workgroup_variables = variables + .map(|(handle, var)| { + debug_assert_eq!(var.space, crate::AddressSpace::WorkGroup); + let len = if workgroup_length == 1 { + // Treat workgroups of size one as not an array, because otherwise we'd output loads of statements for them + // TODO: Heuristics here? + None + } else { + array_len(module, var.ty) + }; + let item = len.map(|len| { + let multiples = len / workgroup_length; + let remainder = len % workgroup_length; + (multiples, remainder) + }); + + total_len += item + .map(|(multiples, remainder)| multiples + (remainder != 0) as u32) + .unwrap_or(1); + (handle, var, item) + }) + .collect::>(); + + // Sort the biggest indices to the front, with the non-array items at the end + workgroup_variables.sort_by_key(|(_, _, len)| std::cmp::Reverse(*len)); + let mut results = Vec::with_capacity(total_len as usize); + + for &(handle, _, len) in &workgroup_variables { + if let Some((multiples, _)) = len { + // Consider 6 items, with workgroup length of 2 + // multiples is 3 + // We output: +0, +2, +4, which give correct maximum index of five for thread index 1 + for i in 0..multiples { + results.push(( + handle, + ZeroInitKind::LocalPlusIndex { + index: NonZeroU32::new(i * workgroup_length), + if_less_than: None, + }, + )) + } + } else { + break; + } + } + + for &(handle, _, len) in &workgroup_variables { + if let Some((multiples, remainder)) = len { + if remainder == 0 { + continue; + } + + // Consider 3 items, with workgroup length of 2 + // multiples is 1, remainder is 1 + // We output: +2, only if index is less than remainder (which is 1) + // i.e. that gets max index 2 in thread 0 + results.push(( + handle, + ZeroInitKind::LocalPlusIndex { + index: NonZeroU32::new(multiples * workgroup_length), + if_less_than: Some(remainder), + }, + )) + } else { + results.push((handle, ZeroInitKind::NotArray)); + break; + } + } + results +} + +fn array_len(module: &Module, ty: Handle) -> Option { + match &module.types[ty].inner { + crate::TypeInner::Array { + base: _base, size, .. + } => match size { + crate::ArraySize::Constant(e) => { + // If e is small, and `base` is big, then we *could* + // split up parts of the base + // We choose not to do this, as it gets very complicated + return Some(e.get()); + } + crate::ArraySize::Dynamic => { + log::error!("Arrays in the workgroup address space can't be dynamically sized"); + } + }, + _ => (), + } + None +} diff --git a/naga/tests/out/glsl/atomicOps.cs_main.Compute.glsl b/naga/tests/out/glsl/atomicOps.cs_main.Compute.glsl index b69c5107ce..f2b77afe60 100644 --- a/naga/tests/out/glsl/atomicOps.cs_main.Compute.glsl +++ b/naga/tests/out/glsl/atomicOps.cs_main.Compute.glsl @@ -23,10 +23,9 @@ shared Struct workgroup_struct; void main() { - if (gl_LocalInvocationID == uvec3(0u)) { - workgroup_atomic_scalar = 0u; - workgroup_atomic_arr = int[2](0, 0); - workgroup_struct = Struct(0u, int[2](0, 0)); + workgroup_atomic_arr[gl_LocalInvocationIndex] = 0; + if (gl_LocalInvocationIndex < 1u) { + workgroup_atomic_scalar = 0u; } memoryBarrierShared(); barrier(); diff --git a/naga/tests/out/glsl/globals.main.Compute.glsl b/naga/tests/out/glsl/globals.main.Compute.glsl index b7ef8bd295..eacdec4394 100644 --- a/naga/tests/out/glsl/globals.main.Compute.glsl +++ b/naga/tests/out/glsl/globals.main.Compute.glsl @@ -52,9 +52,8 @@ void test_msl_packed_vec3_() { } void main() { - if (gl_LocalInvocationID == uvec3(0u)) { - wg = float[10](0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); - at_1 = 0u; + if (gl_LocalInvocationIndex < 1u) { + wg = float[10](0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); } memoryBarrierShared(); barrier(); diff --git a/naga/tests/out/glsl/workgroup-uniform-load.test_workgroupUniformLoad.Compute.glsl b/naga/tests/out/glsl/workgroup-uniform-load.test_workgroupUniformLoad.Compute.glsl index 6315309c99..98b3b51875 100644 --- a/naga/tests/out/glsl/workgroup-uniform-load.test_workgroupUniformLoad.Compute.glsl +++ b/naga/tests/out/glsl/workgroup-uniform-load.test_workgroupUniformLoad.Compute.glsl @@ -11,9 +11,38 @@ shared int arr_i32_[128]; void main() { - if (gl_LocalInvocationID == uvec3(0u)) { - arr_i32_ = int[128](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - } + arr_i32_[gl_LocalInvocationIndex] = 0; + arr_i32_[gl_LocalInvocationIndex + 4u] = 0; + arr_i32_[gl_LocalInvocationIndex + 8u] = 0; + arr_i32_[gl_LocalInvocationIndex + 12u] = 0; + arr_i32_[gl_LocalInvocationIndex + 16u] = 0; + arr_i32_[gl_LocalInvocationIndex + 20u] = 0; + arr_i32_[gl_LocalInvocationIndex + 24u] = 0; + arr_i32_[gl_LocalInvocationIndex + 28u] = 0; + arr_i32_[gl_LocalInvocationIndex + 32u] = 0; + arr_i32_[gl_LocalInvocationIndex + 36u] = 0; + arr_i32_[gl_LocalInvocationIndex + 40u] = 0; + arr_i32_[gl_LocalInvocationIndex + 44u] = 0; + arr_i32_[gl_LocalInvocationIndex + 48u] = 0; + arr_i32_[gl_LocalInvocationIndex + 52u] = 0; + arr_i32_[gl_LocalInvocationIndex + 56u] = 0; + arr_i32_[gl_LocalInvocationIndex + 60u] = 0; + arr_i32_[gl_LocalInvocationIndex + 64u] = 0; + arr_i32_[gl_LocalInvocationIndex + 68u] = 0; + arr_i32_[gl_LocalInvocationIndex + 72u] = 0; + arr_i32_[gl_LocalInvocationIndex + 76u] = 0; + arr_i32_[gl_LocalInvocationIndex + 80u] = 0; + arr_i32_[gl_LocalInvocationIndex + 84u] = 0; + arr_i32_[gl_LocalInvocationIndex + 88u] = 0; + arr_i32_[gl_LocalInvocationIndex + 92u] = 0; + arr_i32_[gl_LocalInvocationIndex + 96u] = 0; + arr_i32_[gl_LocalInvocationIndex + 100u] = 0; + arr_i32_[gl_LocalInvocationIndex + 104u] = 0; + arr_i32_[gl_LocalInvocationIndex + 108u] = 0; + arr_i32_[gl_LocalInvocationIndex + 112u] = 0; + arr_i32_[gl_LocalInvocationIndex + 116u] = 0; + arr_i32_[gl_LocalInvocationIndex + 120u] = 0; + arr_i32_[gl_LocalInvocationIndex + 124u] = 0; memoryBarrierShared(); barrier(); uvec3 workgroup_id = gl_WorkGroupID; diff --git a/naga/tests/out/glsl/workgroup-var-init.main.Compute.glsl b/naga/tests/out/glsl/workgroup-var-init.main.Compute.glsl index de136c1109..5a0572c038 100644 --- a/naga/tests/out/glsl/workgroup-var-init.main.Compute.glsl +++ b/naga/tests/out/glsl/workgroup-var-init.main.Compute.glsl @@ -16,8 +16,8 @@ layout(std430) buffer type_1_block_0Compute { uint _group_0_binding_0_cs[512]; } void main() { - if (gl_LocalInvocationID == uvec3(0u)) { - w_mem = WStruct(uint[512](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), 0, int[8][8](int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0))); + if (gl_LocalInvocationIndex < 1u) { + w_mem = WStruct(uint[512](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), 0, int[8][8](int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0), int[8](0, 0, 0, 0, 0, 0, 0, 0))); } memoryBarrierShared(); barrier(); diff --git a/naga/tests/out/hlsl/atomicOps.hlsl b/naga/tests/out/hlsl/atomicOps.hlsl index 640972a2fa..2d55b0a6d7 100644 --- a/naga/tests/out/hlsl/atomicOps.hlsl +++ b/naga/tests/out/hlsl/atomicOps.hlsl @@ -11,12 +11,11 @@ groupshared int workgroup_atomic_arr[2]; groupshared Struct workgroup_struct; [numthreads(2, 1, 1)] -void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_GroupThreadID) +void cs_main(uint3 id : SV_GroupThreadID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { - workgroup_atomic_scalar = (uint)0; - workgroup_atomic_arr = (int[2])0; - workgroup_struct = (Struct)0; + workgroup_atomic_arr[__local_invocation_index] = (int)0; + if (__local_invocation_index < 1u) { + workgroup_atomic_scalar = (uint)0; } GroupMemoryBarrierWithGroupSync(); storage_atomic_scalar.Store(0, asuint(1u)); diff --git a/naga/tests/out/hlsl/globals.hlsl b/naga/tests/out/hlsl/globals.hlsl index 55faf060d0..eee034f9c9 100644 --- a/naga/tests/out/hlsl/globals.hlsl +++ b/naga/tests/out/hlsl/globals.hlsl @@ -105,11 +105,10 @@ uint NagaBufferLength(ByteAddressBuffer buffer) } [numthreads(1, 1, 1)] -void main(uint3 __local_invocation_id : SV_GroupThreadID) +void main(uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { - wg = (float[10])0; - at_1 = (uint)0; + if (__local_invocation_index < 1u) { + wg = (float[10])0; } GroupMemoryBarrierWithGroupSync(); float Foo = 1.0; diff --git a/naga/tests/out/hlsl/interface.hlsl b/naga/tests/out/hlsl/interface.hlsl index bbf330d4d6..497a9da13e 100644 --- a/naga/tests/out/hlsl/interface.hlsl +++ b/naga/tests/out/hlsl/interface.hlsl @@ -75,10 +75,10 @@ FragmentOutput fragment(FragmentInput_fragment fragmentinput_fragment) } [numthreads(1, 1, 1)] -void compute(uint3 global_id : SV_DispatchThreadID, uint3 local_id : SV_GroupThreadID, uint local_index : SV_GroupIndex, uint3 wg_id : SV_GroupID, uint3 num_wgs : SV_GroupID, uint3 __local_invocation_id : SV_GroupThreadID) +void compute(uint3 global_id : SV_DispatchThreadID, uint3 local_id : SV_GroupThreadID, uint local_index : SV_GroupIndex, uint3 wg_id : SV_GroupID, uint3 num_wgs : SV_GroupID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { - output = (uint[1])0; + if (__local_invocation_index < 1u) { + output = (uint[1])0; } GroupMemoryBarrierWithGroupSync(); output[0] = ((((global_id.x + local_id.x) + local_index) + wg_id.x) + uint3(_NagaConstants.first_vertex, _NagaConstants.first_instance, _NagaConstants.other).x); diff --git a/naga/tests/out/hlsl/workgroup-uniform-load.hlsl b/naga/tests/out/hlsl/workgroup-uniform-load.hlsl index 663fe33649..65cc287c9c 100644 --- a/naga/tests/out/hlsl/workgroup-uniform-load.hlsl +++ b/naga/tests/out/hlsl/workgroup-uniform-load.hlsl @@ -3,11 +3,40 @@ static const uint SIZE = 128u; groupshared int arr_i32_[128]; [numthreads(4, 1, 1)] -void test_workgroupUniformLoad(uint3 workgroup_id : SV_GroupID, uint3 __local_invocation_id : SV_GroupThreadID) +void test_workgroupUniformLoad(uint3 workgroup_id : SV_GroupID, uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { - arr_i32_ = (int[128])0; - } + arr_i32_[__local_invocation_index] = (int)0; + arr_i32_[__local_invocation_index + 4u] = (int)0; + arr_i32_[__local_invocation_index + 8u] = (int)0; + arr_i32_[__local_invocation_index + 12u] = (int)0; + arr_i32_[__local_invocation_index + 16u] = (int)0; + arr_i32_[__local_invocation_index + 20u] = (int)0; + arr_i32_[__local_invocation_index + 24u] = (int)0; + arr_i32_[__local_invocation_index + 28u] = (int)0; + arr_i32_[__local_invocation_index + 32u] = (int)0; + arr_i32_[__local_invocation_index + 36u] = (int)0; + arr_i32_[__local_invocation_index + 40u] = (int)0; + arr_i32_[__local_invocation_index + 44u] = (int)0; + arr_i32_[__local_invocation_index + 48u] = (int)0; + arr_i32_[__local_invocation_index + 52u] = (int)0; + arr_i32_[__local_invocation_index + 56u] = (int)0; + arr_i32_[__local_invocation_index + 60u] = (int)0; + arr_i32_[__local_invocation_index + 64u] = (int)0; + arr_i32_[__local_invocation_index + 68u] = (int)0; + arr_i32_[__local_invocation_index + 72u] = (int)0; + arr_i32_[__local_invocation_index + 76u] = (int)0; + arr_i32_[__local_invocation_index + 80u] = (int)0; + arr_i32_[__local_invocation_index + 84u] = (int)0; + arr_i32_[__local_invocation_index + 88u] = (int)0; + arr_i32_[__local_invocation_index + 92u] = (int)0; + arr_i32_[__local_invocation_index + 96u] = (int)0; + arr_i32_[__local_invocation_index + 100u] = (int)0; + arr_i32_[__local_invocation_index + 104u] = (int)0; + arr_i32_[__local_invocation_index + 108u] = (int)0; + arr_i32_[__local_invocation_index + 112u] = (int)0; + arr_i32_[__local_invocation_index + 116u] = (int)0; + arr_i32_[__local_invocation_index + 120u] = (int)0; + arr_i32_[__local_invocation_index + 124u] = (int)0; GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync(); int _expr4 = arr_i32_[workgroup_id.x]; diff --git a/naga/tests/out/hlsl/workgroup-var-init.hlsl b/naga/tests/out/hlsl/workgroup-var-init.hlsl index e0bd73f8ff..e7bc6d0af9 100644 --- a/naga/tests/out/hlsl/workgroup-var-init.hlsl +++ b/naga/tests/out/hlsl/workgroup-var-init.hlsl @@ -8,10 +8,10 @@ groupshared WStruct w_mem; RWByteAddressBuffer output : register(u0); [numthreads(1, 1, 1)] -void main(uint3 __local_invocation_id : SV_GroupThreadID) +void main(uint __local_invocation_index : SV_GroupIndex) { - if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { - w_mem = (WStruct)0; + if (__local_invocation_index < 1u) { + w_mem = (WStruct)0; } GroupMemoryBarrierWithGroupSync(); uint _expr3[512] = w_mem.arr; diff --git a/naga/tests/out/spv/atomicOps.spvasm b/naga/tests/out/spv/atomicOps.spvasm index de4d687824..54dff3072a 100644 --- a/naga/tests/out/spv/atomicOps.spvasm +++ b/naga/tests/out/spv/atomicOps.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 189 +; Bound: 194 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %28 "cs_main" %25 +OpEntryPoint GLCompute %28 "cs_main" %25 %40 OpExecutionMode %28 LocalSize 2 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %7 0 Offset 0 @@ -24,6 +24,7 @@ OpDecorate %15 Binding 2 OpDecorate %16 Block OpMemberDecorate %16 0 Offset 0 OpDecorate %25 BuiltIn LocalInvocationId +OpDecorate %40 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -55,18 +56,18 @@ OpDecorate %25 BuiltIn LocalInvocationId %35 = OpTypePointer StorageBuffer %7 %37 = OpConstant %3 1 %38 = OpConstant %4 1 -%40 = OpConstantNull %3 -%41 = OpConstantNull %5 -%42 = OpConstantNull %7 -%43 = OpConstantNull %8 -%45 = OpTypeBool -%44 = OpTypeVector %45 3 -%50 = OpConstant %3 264 -%52 = OpConstant %3 64 -%53 = OpTypePointer StorageBuffer %4 -%57 = OpConstant %4 2 -%58 = OpConstant %3 256 -%59 = OpTypePointer Workgroup %4 +%41 = OpTypePointer Input %3 +%40 = OpVariable %41 Input +%46 = OpTypePointer Workgroup %4 +%48 = OpConstantNull %4 +%50 = OpTypeBool +%53 = OpConstantNull %3 +%55 = OpConstant %3 264 +%57 = OpConstant %3 64 +%58 = OpTypePointer StorageBuffer %4 +%62 = OpConstant %4 2 +%63 = OpConstant %3 256 +%64 = OpTypePointer Workgroup %4 %28 = OpFunction %2 None %29 %24 = OpLabel %27 = OpLoad %8 %25 @@ -75,166 +76,174 @@ OpDecorate %25 BuiltIn LocalInvocationId %36 = OpAccessChain %35 %15 %31 OpBranch %39 %39 = OpLabel -%46 = OpIEqual %44 %27 %43 -%47 = OpAll %45 %46 -OpSelectionMerge %48 None -OpBranchConditional %47 %49 %48 -%49 = OpLabel -OpStore %18 %40 -OpStore %20 %41 -OpStore %22 %42 -OpBranch %48 -%48 = OpLabel -OpControlBarrier %6 %6 %50 +%42 = OpLoad %3 %40 +OpBranch %44 +%44 = OpLabel +%47 = OpAccessChain %46 %20 %42 +OpStore %47 %48 +OpBranch %45 +%45 = OpLabel +%49 = OpULessThan %50 %42 %37 +OpSelectionMerge %51 None +OpBranchConditional %49 %52 %51 +%52 = OpLabel +OpStore %18 %53 +OpBranch %54 +%54 = OpLabel OpBranch %51 %51 = OpLabel -OpAtomicStore %32 %38 %52 %37 -%54 = OpAccessChain %53 %34 %37 -OpAtomicStore %54 %38 %52 %38 -%55 = OpAccessChain %30 %36 %31 -OpAtomicStore %55 %38 %52 %37 -%56 = OpAccessChain %53 %36 %37 %37 -OpAtomicStore %56 %38 %52 %38 -OpAtomicStore %18 %57 %58 %37 -%60 = OpAccessChain %59 %20 %37 -OpAtomicStore %60 %57 %58 %38 -%61 = OpAccessChain %19 %22 %31 -OpAtomicStore %61 %57 %58 %37 -%62 = OpAccessChain %59 %22 %37 %37 -OpAtomicStore %62 %57 %58 %38 -OpControlBarrier %6 %6 %50 -%63 = OpAtomicLoad %3 %32 %38 %52 -%64 = OpAccessChain %53 %34 %37 -%65 = OpAtomicLoad %4 %64 %38 %52 -%66 = OpAccessChain %30 %36 %31 -%67 = OpAtomicLoad %3 %66 %38 %52 -%68 = OpAccessChain %53 %36 %37 %37 -%69 = OpAtomicLoad %4 %68 %38 %52 -%70 = OpAtomicLoad %3 %18 %57 %58 -%71 = OpAccessChain %59 %20 %37 -%72 = OpAtomicLoad %4 %71 %57 %58 -%73 = OpAccessChain %19 %22 %31 -%74 = OpAtomicLoad %3 %73 %57 %58 -%75 = OpAccessChain %59 %22 %37 %37 -%76 = OpAtomicLoad %4 %75 %57 %58 -OpControlBarrier %6 %6 %50 -%77 = OpAtomicIAdd %3 %32 %38 %52 %37 -%79 = OpAccessChain %53 %34 %37 -%78 = OpAtomicIAdd %4 %79 %38 %52 %38 -%81 = OpAccessChain %30 %36 %31 -%80 = OpAtomicIAdd %3 %81 %38 %52 %37 -%83 = OpAccessChain %53 %36 %37 %37 -%82 = OpAtomicIAdd %4 %83 %38 %52 %38 -%84 = OpAtomicIAdd %3 %18 %57 %58 %37 -%86 = OpAccessChain %59 %20 %37 -%85 = OpAtomicIAdd %4 %86 %57 %58 %38 -%88 = OpAccessChain %19 %22 %31 -%87 = OpAtomicIAdd %3 %88 %57 %58 %37 -%90 = OpAccessChain %59 %22 %37 %37 -%89 = OpAtomicIAdd %4 %90 %57 %58 %38 -OpControlBarrier %6 %6 %50 -%91 = OpAtomicISub %3 %32 %38 %52 %37 -%93 = OpAccessChain %53 %34 %37 -%92 = OpAtomicISub %4 %93 %38 %52 %38 -%95 = OpAccessChain %30 %36 %31 -%94 = OpAtomicISub %3 %95 %38 %52 %37 -%97 = OpAccessChain %53 %36 %37 %37 -%96 = OpAtomicISub %4 %97 %38 %52 %38 -%98 = OpAtomicISub %3 %18 %57 %58 %37 -%100 = OpAccessChain %59 %20 %37 -%99 = OpAtomicISub %4 %100 %57 %58 %38 -%102 = OpAccessChain %19 %22 %31 -%101 = OpAtomicISub %3 %102 %57 %58 %37 -%104 = OpAccessChain %59 %22 %37 %37 -%103 = OpAtomicISub %4 %104 %57 %58 %38 -OpControlBarrier %6 %6 %50 -%105 = OpAtomicUMax %3 %32 %38 %52 %37 -%107 = OpAccessChain %53 %34 %37 -%106 = OpAtomicSMax %4 %107 %38 %52 %38 -%109 = OpAccessChain %30 %36 %31 -%108 = OpAtomicUMax %3 %109 %38 %52 %37 -%111 = OpAccessChain %53 %36 %37 %37 -%110 = OpAtomicSMax %4 %111 %38 %52 %38 -%112 = OpAtomicUMax %3 %18 %57 %58 %37 -%114 = OpAccessChain %59 %20 %37 -%113 = OpAtomicSMax %4 %114 %57 %58 %38 -%116 = OpAccessChain %19 %22 %31 -%115 = OpAtomicUMax %3 %116 %57 %58 %37 -%118 = OpAccessChain %59 %22 %37 %37 -%117 = OpAtomicSMax %4 %118 %57 %58 %38 -OpControlBarrier %6 %6 %50 -%119 = OpAtomicUMin %3 %32 %38 %52 %37 -%121 = OpAccessChain %53 %34 %37 -%120 = OpAtomicSMin %4 %121 %38 %52 %38 -%123 = OpAccessChain %30 %36 %31 -%122 = OpAtomicUMin %3 %123 %38 %52 %37 -%125 = OpAccessChain %53 %36 %37 %37 -%124 = OpAtomicSMin %4 %125 %38 %52 %38 -%126 = OpAtomicUMin %3 %18 %57 %58 %37 -%128 = OpAccessChain %59 %20 %37 -%127 = OpAtomicSMin %4 %128 %57 %58 %38 -%130 = OpAccessChain %19 %22 %31 -%129 = OpAtomicUMin %3 %130 %57 %58 %37 -%132 = OpAccessChain %59 %22 %37 %37 -%131 = OpAtomicSMin %4 %132 %57 %58 %38 -OpControlBarrier %6 %6 %50 -%133 = OpAtomicAnd %3 %32 %38 %52 %37 -%135 = OpAccessChain %53 %34 %37 -%134 = OpAtomicAnd %4 %135 %38 %52 %38 -%137 = OpAccessChain %30 %36 %31 -%136 = OpAtomicAnd %3 %137 %38 %52 %37 -%139 = OpAccessChain %53 %36 %37 %37 -%138 = OpAtomicAnd %4 %139 %38 %52 %38 -%140 = OpAtomicAnd %3 %18 %57 %58 %37 -%142 = OpAccessChain %59 %20 %37 -%141 = OpAtomicAnd %4 %142 %57 %58 %38 -%144 = OpAccessChain %19 %22 %31 -%143 = OpAtomicAnd %3 %144 %57 %58 %37 -%146 = OpAccessChain %59 %22 %37 %37 -%145 = OpAtomicAnd %4 %146 %57 %58 %38 -OpControlBarrier %6 %6 %50 -%147 = OpAtomicOr %3 %32 %38 %52 %37 -%149 = OpAccessChain %53 %34 %37 -%148 = OpAtomicOr %4 %149 %38 %52 %38 -%151 = OpAccessChain %30 %36 %31 -%150 = OpAtomicOr %3 %151 %38 %52 %37 -%153 = OpAccessChain %53 %36 %37 %37 -%152 = OpAtomicOr %4 %153 %38 %52 %38 -%154 = OpAtomicOr %3 %18 %57 %58 %37 -%156 = OpAccessChain %59 %20 %37 -%155 = OpAtomicOr %4 %156 %57 %58 %38 -%158 = OpAccessChain %19 %22 %31 -%157 = OpAtomicOr %3 %158 %57 %58 %37 -%160 = OpAccessChain %59 %22 %37 %37 -%159 = OpAtomicOr %4 %160 %57 %58 %38 -OpControlBarrier %6 %6 %50 -%161 = OpAtomicXor %3 %32 %38 %52 %37 -%163 = OpAccessChain %53 %34 %37 -%162 = OpAtomicXor %4 %163 %38 %52 %38 -%165 = OpAccessChain %30 %36 %31 -%164 = OpAtomicXor %3 %165 %38 %52 %37 -%167 = OpAccessChain %53 %36 %37 %37 -%166 = OpAtomicXor %4 %167 %38 %52 %38 -%168 = OpAtomicXor %3 %18 %57 %58 %37 -%170 = OpAccessChain %59 %20 %37 -%169 = OpAtomicXor %4 %170 %57 %58 %38 -%172 = OpAccessChain %19 %22 %31 -%171 = OpAtomicXor %3 %172 %57 %58 %37 -%174 = OpAccessChain %59 %22 %37 %37 -%173 = OpAtomicXor %4 %174 %57 %58 %38 -%175 = OpAtomicExchange %3 %32 %38 %52 %37 -%177 = OpAccessChain %53 %34 %37 -%176 = OpAtomicExchange %4 %177 %38 %52 %38 -%179 = OpAccessChain %30 %36 %31 -%178 = OpAtomicExchange %3 %179 %38 %52 %37 -%181 = OpAccessChain %53 %36 %37 %37 -%180 = OpAtomicExchange %4 %181 %38 %52 %38 -%182 = OpAtomicExchange %3 %18 %57 %58 %37 -%184 = OpAccessChain %59 %20 %37 -%183 = OpAtomicExchange %4 %184 %57 %58 %38 -%186 = OpAccessChain %19 %22 %31 -%185 = OpAtomicExchange %3 %186 %57 %58 %37 -%188 = OpAccessChain %59 %22 %37 %37 -%187 = OpAtomicExchange %4 %188 %57 %58 %38 +OpBranch %43 +%43 = OpLabel +OpControlBarrier %6 %6 %55 +OpBranch %56 +%56 = OpLabel +OpAtomicStore %32 %38 %57 %37 +%59 = OpAccessChain %58 %34 %37 +OpAtomicStore %59 %38 %57 %38 +%60 = OpAccessChain %30 %36 %31 +OpAtomicStore %60 %38 %57 %37 +%61 = OpAccessChain %58 %36 %37 %37 +OpAtomicStore %61 %38 %57 %38 +OpAtomicStore %18 %62 %63 %37 +%65 = OpAccessChain %64 %20 %37 +OpAtomicStore %65 %62 %63 %38 +%66 = OpAccessChain %19 %22 %31 +OpAtomicStore %66 %62 %63 %37 +%67 = OpAccessChain %64 %22 %37 %37 +OpAtomicStore %67 %62 %63 %38 +OpControlBarrier %6 %6 %55 +%68 = OpAtomicLoad %3 %32 %38 %57 +%69 = OpAccessChain %58 %34 %37 +%70 = OpAtomicLoad %4 %69 %38 %57 +%71 = OpAccessChain %30 %36 %31 +%72 = OpAtomicLoad %3 %71 %38 %57 +%73 = OpAccessChain %58 %36 %37 %37 +%74 = OpAtomicLoad %4 %73 %38 %57 +%75 = OpAtomicLoad %3 %18 %62 %63 +%76 = OpAccessChain %64 %20 %37 +%77 = OpAtomicLoad %4 %76 %62 %63 +%78 = OpAccessChain %19 %22 %31 +%79 = OpAtomicLoad %3 %78 %62 %63 +%80 = OpAccessChain %64 %22 %37 %37 +%81 = OpAtomicLoad %4 %80 %62 %63 +OpControlBarrier %6 %6 %55 +%82 = OpAtomicIAdd %3 %32 %38 %57 %37 +%84 = OpAccessChain %58 %34 %37 +%83 = OpAtomicIAdd %4 %84 %38 %57 %38 +%86 = OpAccessChain %30 %36 %31 +%85 = OpAtomicIAdd %3 %86 %38 %57 %37 +%88 = OpAccessChain %58 %36 %37 %37 +%87 = OpAtomicIAdd %4 %88 %38 %57 %38 +%89 = OpAtomicIAdd %3 %18 %62 %63 %37 +%91 = OpAccessChain %64 %20 %37 +%90 = OpAtomicIAdd %4 %91 %62 %63 %38 +%93 = OpAccessChain %19 %22 %31 +%92 = OpAtomicIAdd %3 %93 %62 %63 %37 +%95 = OpAccessChain %64 %22 %37 %37 +%94 = OpAtomicIAdd %4 %95 %62 %63 %38 +OpControlBarrier %6 %6 %55 +%96 = OpAtomicISub %3 %32 %38 %57 %37 +%98 = OpAccessChain %58 %34 %37 +%97 = OpAtomicISub %4 %98 %38 %57 %38 +%100 = OpAccessChain %30 %36 %31 +%99 = OpAtomicISub %3 %100 %38 %57 %37 +%102 = OpAccessChain %58 %36 %37 %37 +%101 = OpAtomicISub %4 %102 %38 %57 %38 +%103 = OpAtomicISub %3 %18 %62 %63 %37 +%105 = OpAccessChain %64 %20 %37 +%104 = OpAtomicISub %4 %105 %62 %63 %38 +%107 = OpAccessChain %19 %22 %31 +%106 = OpAtomicISub %3 %107 %62 %63 %37 +%109 = OpAccessChain %64 %22 %37 %37 +%108 = OpAtomicISub %4 %109 %62 %63 %38 +OpControlBarrier %6 %6 %55 +%110 = OpAtomicUMax %3 %32 %38 %57 %37 +%112 = OpAccessChain %58 %34 %37 +%111 = OpAtomicSMax %4 %112 %38 %57 %38 +%114 = OpAccessChain %30 %36 %31 +%113 = OpAtomicUMax %3 %114 %38 %57 %37 +%116 = OpAccessChain %58 %36 %37 %37 +%115 = OpAtomicSMax %4 %116 %38 %57 %38 +%117 = OpAtomicUMax %3 %18 %62 %63 %37 +%119 = OpAccessChain %64 %20 %37 +%118 = OpAtomicSMax %4 %119 %62 %63 %38 +%121 = OpAccessChain %19 %22 %31 +%120 = OpAtomicUMax %3 %121 %62 %63 %37 +%123 = OpAccessChain %64 %22 %37 %37 +%122 = OpAtomicSMax %4 %123 %62 %63 %38 +OpControlBarrier %6 %6 %55 +%124 = OpAtomicUMin %3 %32 %38 %57 %37 +%126 = OpAccessChain %58 %34 %37 +%125 = OpAtomicSMin %4 %126 %38 %57 %38 +%128 = OpAccessChain %30 %36 %31 +%127 = OpAtomicUMin %3 %128 %38 %57 %37 +%130 = OpAccessChain %58 %36 %37 %37 +%129 = OpAtomicSMin %4 %130 %38 %57 %38 +%131 = OpAtomicUMin %3 %18 %62 %63 %37 +%133 = OpAccessChain %64 %20 %37 +%132 = OpAtomicSMin %4 %133 %62 %63 %38 +%135 = OpAccessChain %19 %22 %31 +%134 = OpAtomicUMin %3 %135 %62 %63 %37 +%137 = OpAccessChain %64 %22 %37 %37 +%136 = OpAtomicSMin %4 %137 %62 %63 %38 +OpControlBarrier %6 %6 %55 +%138 = OpAtomicAnd %3 %32 %38 %57 %37 +%140 = OpAccessChain %58 %34 %37 +%139 = OpAtomicAnd %4 %140 %38 %57 %38 +%142 = OpAccessChain %30 %36 %31 +%141 = OpAtomicAnd %3 %142 %38 %57 %37 +%144 = OpAccessChain %58 %36 %37 %37 +%143 = OpAtomicAnd %4 %144 %38 %57 %38 +%145 = OpAtomicAnd %3 %18 %62 %63 %37 +%147 = OpAccessChain %64 %20 %37 +%146 = OpAtomicAnd %4 %147 %62 %63 %38 +%149 = OpAccessChain %19 %22 %31 +%148 = OpAtomicAnd %3 %149 %62 %63 %37 +%151 = OpAccessChain %64 %22 %37 %37 +%150 = OpAtomicAnd %4 %151 %62 %63 %38 +OpControlBarrier %6 %6 %55 +%152 = OpAtomicOr %3 %32 %38 %57 %37 +%154 = OpAccessChain %58 %34 %37 +%153 = OpAtomicOr %4 %154 %38 %57 %38 +%156 = OpAccessChain %30 %36 %31 +%155 = OpAtomicOr %3 %156 %38 %57 %37 +%158 = OpAccessChain %58 %36 %37 %37 +%157 = OpAtomicOr %4 %158 %38 %57 %38 +%159 = OpAtomicOr %3 %18 %62 %63 %37 +%161 = OpAccessChain %64 %20 %37 +%160 = OpAtomicOr %4 %161 %62 %63 %38 +%163 = OpAccessChain %19 %22 %31 +%162 = OpAtomicOr %3 %163 %62 %63 %37 +%165 = OpAccessChain %64 %22 %37 %37 +%164 = OpAtomicOr %4 %165 %62 %63 %38 +OpControlBarrier %6 %6 %55 +%166 = OpAtomicXor %3 %32 %38 %57 %37 +%168 = OpAccessChain %58 %34 %37 +%167 = OpAtomicXor %4 %168 %38 %57 %38 +%170 = OpAccessChain %30 %36 %31 +%169 = OpAtomicXor %3 %170 %38 %57 %37 +%172 = OpAccessChain %58 %36 %37 %37 +%171 = OpAtomicXor %4 %172 %38 %57 %38 +%173 = OpAtomicXor %3 %18 %62 %63 %37 +%175 = OpAccessChain %64 %20 %37 +%174 = OpAtomicXor %4 %175 %62 %63 %38 +%177 = OpAccessChain %19 %22 %31 +%176 = OpAtomicXor %3 %177 %62 %63 %37 +%179 = OpAccessChain %64 %22 %37 %37 +%178 = OpAtomicXor %4 %179 %62 %63 %38 +%180 = OpAtomicExchange %3 %32 %38 %57 %37 +%182 = OpAccessChain %58 %34 %37 +%181 = OpAtomicExchange %4 %182 %38 %57 %38 +%184 = OpAccessChain %30 %36 %31 +%183 = OpAtomicExchange %3 %184 %38 %57 %37 +%186 = OpAccessChain %58 %36 %37 %37 +%185 = OpAtomicExchange %4 %186 %38 %57 %38 +%187 = OpAtomicExchange %3 %18 %62 %63 %37 +%189 = OpAccessChain %64 %20 %37 +%188 = OpAtomicExchange %4 %189 %62 %63 %38 +%191 = OpAccessChain %19 %22 %31 +%190 = OpAtomicExchange %3 %191 %62 %63 %37 +%193 = OpAccessChain %64 %22 %37 %37 +%192 = OpAtomicExchange %4 %193 %62 %63 %38 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/globals.spvasm b/naga/tests/out/spv/globals.spvasm index 4aa6a10ad5..630434f29a 100644 --- a/naga/tests/out/spv/globals.spvasm +++ b/naga/tests/out/spv/globals.spvasm @@ -1,12 +1,12 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 174 +; Bound: 172 OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %93 "main" %116 +OpEntryPoint GLCompute %93 "main" %113 OpExecutionMode %93 LocalSize 1 1 1 OpDecorate %5 ArrayStride 4 OpMemberDecorate %9 0 Offset 0 @@ -48,7 +48,7 @@ OpDecorate %48 DescriptorSet 0 OpDecorate %48 Binding 7 OpDecorate %49 Block OpMemberDecorate %49 0 Offset 0 -OpDecorate %116 BuiltIn LocalInvocationId +OpDecorate %113 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeBool %4 = OpTypeFloat 32 @@ -120,32 +120,28 @@ OpDecorate %116 BuiltIn LocalInvocationId %107 = OpConstant %4 4.0 %109 = OpTypePointer Function %4 %111 = OpTypePointer Function %3 -%113 = OpConstantNull %5 -%114 = OpConstantNull %7 -%115 = OpTypeVector %7 3 -%117 = OpTypePointer Input %115 -%116 = OpVariable %117 Input -%119 = OpConstantNull %115 -%120 = OpTypeVector %3 3 -%125 = OpConstant %7 264 -%128 = OpTypePointer Workgroup %4 -%129 = OpTypePointer Uniform %21 -%130 = OpTypePointer Uniform %20 -%133 = OpTypePointer Uniform %17 -%134 = OpTypePointer Uniform %16 -%135 = OpTypePointer Uniform %12 -%140 = OpConstant %7 7 -%146 = OpConstant %7 6 -%148 = OpTypePointer StorageBuffer %10 -%149 = OpConstant %7 1 -%152 = OpConstant %7 5 -%154 = OpTypePointer Uniform %12 -%155 = OpTypePointer Uniform %4 -%156 = OpConstant %7 3 -%159 = OpConstant %7 4 -%161 = OpTypePointer StorageBuffer %4 -%172 = OpConstant %23 2 -%173 = OpConstant %7 256 +%114 = OpTypePointer Input %7 +%113 = OpVariable %114 Input +%119 = OpConstant %7 1 +%122 = OpConstantNull %5 +%124 = OpConstant %7 264 +%127 = OpTypePointer Workgroup %4 +%128 = OpTypePointer Uniform %21 +%129 = OpTypePointer Uniform %20 +%132 = OpTypePointer Uniform %17 +%133 = OpTypePointer Uniform %16 +%134 = OpTypePointer Uniform %12 +%139 = OpConstant %7 7 +%145 = OpConstant %7 6 +%147 = OpTypePointer StorageBuffer %10 +%150 = OpConstant %7 5 +%152 = OpTypePointer Uniform %12 +%153 = OpTypePointer Uniform %4 +%154 = OpConstant %7 3 +%157 = OpConstant %7 4 +%159 = OpTypePointer StorageBuffer %4 +%170 = OpConstant %23 2 +%171 = OpConstant %7 256 %53 = OpFunction %2 None %54 %52 = OpFunctionParameter %8 %51 = OpLabel @@ -197,56 +193,60 @@ OpFunctionEnd %106 = OpAccessChain %105 %48 %60 OpBranch %112 %112 = OpLabel -%118 = OpLoad %115 %116 -%121 = OpIEqual %120 %118 %119 -%122 = OpAll %3 %121 -OpSelectionMerge %123 None -OpBranchConditional %122 %124 %123 -%124 = OpLabel -OpStore %26 %113 -OpStore %28 %114 +%115 = OpLoad %7 %113 +OpBranch %117 +%117 = OpLabel +%118 = OpULessThan %3 %115 %119 +OpSelectionMerge %120 None +OpBranchConditional %118 %121 %120 +%121 = OpLabel +OpStore %26 %122 OpBranch %123 %123 = OpLabel -OpControlBarrier %18 %18 %125 -OpBranch %126 -%126 = OpLabel -%127 = OpFunctionCall %2 %57 -%131 = OpAccessChain %130 %106 %60 %60 -%132 = OpLoad %20 %131 -%136 = OpAccessChain %135 %104 %60 %60 %60 -%137 = OpLoad %12 %136 -%138 = OpMatrixTimesVector %10 %132 %137 -%139 = OpCompositeExtract %4 %138 0 -%141 = OpAccessChain %128 %26 %140 -OpStore %141 %139 -%142 = OpLoad %15 %102 -%143 = OpLoad %8 %100 -%144 = OpMatrixTimesVector %10 %142 %143 -%145 = OpCompositeExtract %4 %144 0 -%147 = OpAccessChain %128 %26 %146 -OpStore %147 %145 -%150 = OpAccessChain %73 %96 %149 %149 -%151 = OpLoad %4 %150 -%153 = OpAccessChain %128 %26 %152 -OpStore %153 %151 -%157 = OpAccessChain %155 %98 %60 %156 -%158 = OpLoad %4 %157 -%160 = OpAccessChain %128 %26 %159 -OpStore %160 %158 -%162 = OpAccessChain %161 %94 %149 -%163 = OpLoad %4 %162 -%164 = OpAccessChain %128 %26 %156 -OpStore %164 %163 -%165 = OpAccessChain %73 %94 %60 %60 -%166 = OpLoad %4 %165 -%167 = OpAccessChain %128 %26 %18 -OpStore %167 %166 -%168 = OpAccessChain %161 %94 %149 -OpStore %168 %107 -%169 = OpArrayLength %7 %33 0 -%170 = OpConvertUToF %4 %169 -%171 = OpAccessChain %128 %26 %149 -OpStore %171 %170 -OpAtomicStore %28 %172 %173 %18 +OpBranch %120 +%120 = OpLabel +OpBranch %116 +%116 = OpLabel +OpControlBarrier %18 %18 %124 +OpBranch %125 +%125 = OpLabel +%126 = OpFunctionCall %2 %57 +%130 = OpAccessChain %129 %106 %60 %60 +%131 = OpLoad %20 %130 +%135 = OpAccessChain %134 %104 %60 %60 %60 +%136 = OpLoad %12 %135 +%137 = OpMatrixTimesVector %10 %131 %136 +%138 = OpCompositeExtract %4 %137 0 +%140 = OpAccessChain %127 %26 %139 +OpStore %140 %138 +%141 = OpLoad %15 %102 +%142 = OpLoad %8 %100 +%143 = OpMatrixTimesVector %10 %141 %142 +%144 = OpCompositeExtract %4 %143 0 +%146 = OpAccessChain %127 %26 %145 +OpStore %146 %144 +%148 = OpAccessChain %73 %96 %119 %119 +%149 = OpLoad %4 %148 +%151 = OpAccessChain %127 %26 %150 +OpStore %151 %149 +%155 = OpAccessChain %153 %98 %60 %154 +%156 = OpLoad %4 %155 +%158 = OpAccessChain %127 %26 %157 +OpStore %158 %156 +%160 = OpAccessChain %159 %94 %119 +%161 = OpLoad %4 %160 +%162 = OpAccessChain %127 %26 %154 +OpStore %162 %161 +%163 = OpAccessChain %73 %94 %60 %60 +%164 = OpLoad %4 %163 +%165 = OpAccessChain %127 %26 %18 +OpStore %165 %164 +%166 = OpAccessChain %159 %94 %119 +OpStore %166 %107 +%167 = OpArrayLength %7 %33 0 +%168 = OpConvertUToF %4 %167 +%169 = OpAccessChain %127 %26 %119 +OpStore %169 %168 +OpAtomicStore %28 %170 %171 %18 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/interface.compute.spvasm b/naga/tests/out/spv/interface.compute.spvasm index 73f6ecb2c2..174a253459 100644 --- a/naga/tests/out/spv/interface.compute.spvasm +++ b/naga/tests/out/spv/interface.compute.spvasm @@ -42,9 +42,7 @@ OpDecorate %27 BuiltIn NumWorkgroups %25 = OpVariable %18 Input %27 = OpVariable %18 Input %30 = OpTypeFunction %2 -%32 = OpConstantNull %9 -%33 = OpConstantNull %11 -%34 = OpTypeVector %8 3 +%37 = OpConstantNull %9 %39 = OpConstant %6 2 %40 = OpConstant %6 264 %42 = OpTypePointer Workgroup %6 @@ -58,14 +56,19 @@ OpDecorate %27 BuiltIn NumWorkgroups %28 = OpLoad %11 %27 OpBranch %31 %31 = OpLabel -%35 = OpIEqual %34 %21 %33 -%36 = OpAll %8 %35 -OpSelectionMerge %37 None -OpBranchConditional %36 %38 %37 +OpBranch %33 +%33 = OpLabel +%34 = OpULessThan %8 %24 %10 +OpSelectionMerge %35 None +OpBranchConditional %34 %36 %35 +%36 = OpLabel +OpStore %14 %37 +OpBranch %38 %38 = OpLabel -OpStore %14 %32 -OpBranch %37 -%37 = OpLabel +OpBranch %35 +%35 = OpLabel +OpBranch %32 +%32 = OpLabel OpControlBarrier %39 %39 %40 OpBranch %41 %41 = OpLabel diff --git a/naga/tests/out/spv/overrides-atomicCompareExchangeWeak.f.spvasm b/naga/tests/out/spv/overrides-atomicCompareExchangeWeak.f.spvasm index 59c69ae1fc..ca72104b93 100644 --- a/naga/tests/out/spv/overrides-atomicCompareExchangeWeak.f.spvasm +++ b/naga/tests/out/spv/overrides-atomicCompareExchangeWeak.f.spvasm @@ -1,15 +1,15 @@ ; SPIR-V ; Version: 1.0 ; Generator: rspirv -; Bound: 33 +; Bound: 32 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %11 "f" %18 +OpEntryPoint GLCompute %11 "f" %16 OpExecutionMode %11 LocalSize 1 1 1 OpMemberDecorate %6 0 Offset 0 OpMemberDecorate %6 1 Offset 4 -OpDecorate %18 BuiltIn LocalInvocationId +OpDecorate %16 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 1 %4 = OpTypeInt 32 0 @@ -21,32 +21,34 @@ OpDecorate %18 BuiltIn LocalInvocationId %12 = OpTypeFunction %2 %13 = OpConstant %4 2 %14 = OpConstant %4 1 -%16 = OpConstantNull %4 -%17 = OpTypeVector %4 3 -%19 = OpTypePointer Input %17 -%18 = OpVariable %19 Input -%21 = OpConstantNull %17 -%22 = OpTypeVector %5 3 -%27 = OpConstant %4 264 -%30 = OpConstant %4 256 +%17 = OpTypePointer Input %4 +%16 = OpVariable %17 Input +%24 = OpConstantNull %4 +%26 = OpConstant %4 264 +%29 = OpConstant %4 256 %11 = OpFunction %2 None %12 %10 = OpLabel OpBranch %15 %15 = OpLabel -%20 = OpLoad %17 %18 -%23 = OpIEqual %22 %20 %21 -%24 = OpAll %5 %23 -OpSelectionMerge %25 None -OpBranchConditional %24 %26 %25 -%26 = OpLabel -OpStore %8 %16 +%18 = OpLoad %4 %16 +OpBranch %20 +%20 = OpLabel +%21 = OpULessThan %5 %18 %14 +OpSelectionMerge %22 None +OpBranchConditional %21 %23 %22 +%23 = OpLabel +OpStore %8 %24 OpBranch %25 %25 = OpLabel -OpControlBarrier %13 %13 %27 -OpBranch %28 -%28 = OpLabel -%31 = OpAtomicCompareExchange %4 %8 %7 %30 %30 %14 %13 -%32 = OpIEqual %5 %31 %13 -%29 = OpCompositeConstruct %6 %31 %32 +OpBranch %22 +%22 = OpLabel +OpBranch %19 +%19 = OpLabel +OpControlBarrier %13 %13 %26 +OpBranch %27 +%27 = OpLabel +%30 = OpAtomicCompareExchange %4 %8 %7 %29 %29 %14 %13 +%31 = OpIEqual %5 %30 %13 +%28 = OpCompositeConstruct %6 %30 %31 OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/workgroup-uniform-load.spvasm b/naga/tests/out/spv/workgroup-uniform-load.spvasm index 87f212a799..2def39499a 100644 --- a/naga/tests/out/spv/workgroup-uniform-load.spvasm +++ b/naga/tests/out/spv/workgroup-uniform-load.spvasm @@ -1,15 +1,15 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 40 +; Bound: 194 OpCapability Shader %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %14 "test_workgroupUniformLoad" %11 %19 +OpEntryPoint GLCompute %14 "test_workgroupUniformLoad" %11 %18 OpExecutionMode %14 LocalSize 4 1 1 OpDecorate %5 ArrayStride 4 OpDecorate %11 BuiltIn WorkgroupId -OpDecorate %19 BuiltIn LocalInvocationId +OpDecorate %18 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %4 = OpTypeInt 32 1 @@ -22,45 +22,261 @@ OpDecorate %19 BuiltIn LocalInvocationId %11 = OpVariable %12 Input %15 = OpTypeFunction %2 %16 = OpConstant %4 10 -%18 = OpConstantNull %5 -%20 = OpTypePointer Input %7 -%19 = OpVariable %20 Input -%22 = OpConstantNull %7 -%24 = OpTypeBool -%23 = OpTypeVector %24 3 -%29 = OpConstant %3 2 -%30 = OpConstant %3 264 -%33 = OpTypePointer Workgroup %4 +%19 = OpTypePointer Input %3 +%18 = OpVariable %19 Input +%24 = OpTypePointer Workgroup %4 +%26 = OpConstantNull %4 +%28 = OpConstant %3 4 +%30 = OpTypePointer Workgroup %4 +%33 = OpConstant %3 8 +%35 = OpTypePointer Workgroup %4 +%38 = OpConstant %3 12 +%40 = OpTypePointer Workgroup %4 +%43 = OpConstant %3 16 +%45 = OpTypePointer Workgroup %4 +%48 = OpConstant %3 20 +%50 = OpTypePointer Workgroup %4 +%53 = OpConstant %3 24 +%55 = OpTypePointer Workgroup %4 +%58 = OpConstant %3 28 +%60 = OpTypePointer Workgroup %4 +%63 = OpConstant %3 32 +%65 = OpTypePointer Workgroup %4 +%68 = OpConstant %3 36 +%70 = OpTypePointer Workgroup %4 +%73 = OpConstant %3 40 +%75 = OpTypePointer Workgroup %4 +%78 = OpConstant %3 44 +%80 = OpTypePointer Workgroup %4 +%83 = OpConstant %3 48 +%85 = OpTypePointer Workgroup %4 +%88 = OpConstant %3 52 +%90 = OpTypePointer Workgroup %4 +%93 = OpConstant %3 56 +%95 = OpTypePointer Workgroup %4 +%98 = OpConstant %3 60 +%100 = OpTypePointer Workgroup %4 +%103 = OpConstant %3 64 +%105 = OpTypePointer Workgroup %4 +%108 = OpConstant %3 68 +%110 = OpTypePointer Workgroup %4 +%113 = OpConstant %3 72 +%115 = OpTypePointer Workgroup %4 +%118 = OpConstant %3 76 +%120 = OpTypePointer Workgroup %4 +%123 = OpConstant %3 80 +%125 = OpTypePointer Workgroup %4 +%128 = OpConstant %3 84 +%130 = OpTypePointer Workgroup %4 +%133 = OpConstant %3 88 +%135 = OpTypePointer Workgroup %4 +%138 = OpConstant %3 92 +%140 = OpTypePointer Workgroup %4 +%143 = OpConstant %3 96 +%145 = OpTypePointer Workgroup %4 +%148 = OpConstant %3 100 +%150 = OpTypePointer Workgroup %4 +%153 = OpConstant %3 104 +%155 = OpTypePointer Workgroup %4 +%158 = OpConstant %3 108 +%160 = OpTypePointer Workgroup %4 +%163 = OpConstant %3 112 +%165 = OpTypePointer Workgroup %4 +%168 = OpConstant %3 116 +%170 = OpTypePointer Workgroup %4 +%173 = OpConstant %3 120 +%175 = OpTypePointer Workgroup %4 +%178 = OpConstant %3 124 +%180 = OpTypePointer Workgroup %4 +%182 = OpConstant %3 2 +%183 = OpConstant %3 264 +%186 = OpTypePointer Workgroup %4 +%189 = OpTypeBool %14 = OpFunction %2 None %15 %10 = OpLabel %13 = OpLoad %7 %11 OpBranch %17 %17 = OpLabel -%21 = OpLoad %7 %19 -%25 = OpIEqual %23 %21 %22 -%26 = OpAll %24 %25 -OpSelectionMerge %27 None -OpBranchConditional %26 %28 %27 -%28 = OpLabel -OpStore %8 %18 +%20 = OpLoad %3 %18 +OpBranch %22 +%22 = OpLabel +%25 = OpAccessChain %24 %8 %20 +OpStore %25 %26 +OpBranch %23 +%23 = OpLabel +%29 = OpIAdd %3 %20 %28 +%31 = OpAccessChain %30 %8 %29 +OpStore %31 %26 OpBranch %27 %27 = OpLabel -OpControlBarrier %29 %29 %30 -OpBranch %31 -%31 = OpLabel -%32 = OpCompositeExtract %3 %13 0 -OpControlBarrier %29 %29 %30 -%34 = OpAccessChain %33 %8 %32 -%35 = OpLoad %4 %34 -OpControlBarrier %29 %29 %30 -%36 = OpSGreaterThan %24 %35 %16 -OpSelectionMerge %37 None -OpBranchConditional %36 %38 %39 -%38 = OpLabel -OpControlBarrier %29 %29 %30 +%34 = OpIAdd %3 %20 %33 +%36 = OpAccessChain %35 %8 %34 +OpStore %36 %26 +OpBranch %32 +%32 = OpLabel +%39 = OpIAdd %3 %20 %38 +%41 = OpAccessChain %40 %8 %39 +OpStore %41 %26 +OpBranch %37 +%37 = OpLabel +%44 = OpIAdd %3 %20 %43 +%46 = OpAccessChain %45 %8 %44 +OpStore %46 %26 +OpBranch %42 +%42 = OpLabel +%49 = OpIAdd %3 %20 %48 +%51 = OpAccessChain %50 %8 %49 +OpStore %51 %26 +OpBranch %47 +%47 = OpLabel +%54 = OpIAdd %3 %20 %53 +%56 = OpAccessChain %55 %8 %54 +OpStore %56 %26 +OpBranch %52 +%52 = OpLabel +%59 = OpIAdd %3 %20 %58 +%61 = OpAccessChain %60 %8 %59 +OpStore %61 %26 +OpBranch %57 +%57 = OpLabel +%64 = OpIAdd %3 %20 %63 +%66 = OpAccessChain %65 %8 %64 +OpStore %66 %26 +OpBranch %62 +%62 = OpLabel +%69 = OpIAdd %3 %20 %68 +%71 = OpAccessChain %70 %8 %69 +OpStore %71 %26 +OpBranch %67 +%67 = OpLabel +%74 = OpIAdd %3 %20 %73 +%76 = OpAccessChain %75 %8 %74 +OpStore %76 %26 +OpBranch %72 +%72 = OpLabel +%79 = OpIAdd %3 %20 %78 +%81 = OpAccessChain %80 %8 %79 +OpStore %81 %26 +OpBranch %77 +%77 = OpLabel +%84 = OpIAdd %3 %20 %83 +%86 = OpAccessChain %85 %8 %84 +OpStore %86 %26 +OpBranch %82 +%82 = OpLabel +%89 = OpIAdd %3 %20 %88 +%91 = OpAccessChain %90 %8 %89 +OpStore %91 %26 +OpBranch %87 +%87 = OpLabel +%94 = OpIAdd %3 %20 %93 +%96 = OpAccessChain %95 %8 %94 +OpStore %96 %26 +OpBranch %92 +%92 = OpLabel +%99 = OpIAdd %3 %20 %98 +%101 = OpAccessChain %100 %8 %99 +OpStore %101 %26 +OpBranch %97 +%97 = OpLabel +%104 = OpIAdd %3 %20 %103 +%106 = OpAccessChain %105 %8 %104 +OpStore %106 %26 +OpBranch %102 +%102 = OpLabel +%109 = OpIAdd %3 %20 %108 +%111 = OpAccessChain %110 %8 %109 +OpStore %111 %26 +OpBranch %107 +%107 = OpLabel +%114 = OpIAdd %3 %20 %113 +%116 = OpAccessChain %115 %8 %114 +OpStore %116 %26 +OpBranch %112 +%112 = OpLabel +%119 = OpIAdd %3 %20 %118 +%121 = OpAccessChain %120 %8 %119 +OpStore %121 %26 +OpBranch %117 +%117 = OpLabel +%124 = OpIAdd %3 %20 %123 +%126 = OpAccessChain %125 %8 %124 +OpStore %126 %26 +OpBranch %122 +%122 = OpLabel +%129 = OpIAdd %3 %20 %128 +%131 = OpAccessChain %130 %8 %129 +OpStore %131 %26 +OpBranch %127 +%127 = OpLabel +%134 = OpIAdd %3 %20 %133 +%136 = OpAccessChain %135 %8 %134 +OpStore %136 %26 +OpBranch %132 +%132 = OpLabel +%139 = OpIAdd %3 %20 %138 +%141 = OpAccessChain %140 %8 %139 +OpStore %141 %26 +OpBranch %137 +%137 = OpLabel +%144 = OpIAdd %3 %20 %143 +%146 = OpAccessChain %145 %8 %144 +OpStore %146 %26 +OpBranch %142 +%142 = OpLabel +%149 = OpIAdd %3 %20 %148 +%151 = OpAccessChain %150 %8 %149 +OpStore %151 %26 +OpBranch %147 +%147 = OpLabel +%154 = OpIAdd %3 %20 %153 +%156 = OpAccessChain %155 %8 %154 +OpStore %156 %26 +OpBranch %152 +%152 = OpLabel +%159 = OpIAdd %3 %20 %158 +%161 = OpAccessChain %160 %8 %159 +OpStore %161 %26 +OpBranch %157 +%157 = OpLabel +%164 = OpIAdd %3 %20 %163 +%166 = OpAccessChain %165 %8 %164 +OpStore %166 %26 +OpBranch %162 +%162 = OpLabel +%169 = OpIAdd %3 %20 %168 +%171 = OpAccessChain %170 %8 %169 +OpStore %171 %26 +OpBranch %167 +%167 = OpLabel +%174 = OpIAdd %3 %20 %173 +%176 = OpAccessChain %175 %8 %174 +OpStore %176 %26 +OpBranch %172 +%172 = OpLabel +%179 = OpIAdd %3 %20 %178 +%181 = OpAccessChain %180 %8 %179 +OpStore %181 %26 +OpBranch %177 +%177 = OpLabel +OpBranch %21 +%21 = OpLabel +OpControlBarrier %182 %182 %183 +OpBranch %184 +%184 = OpLabel +%185 = OpCompositeExtract %3 %13 0 +OpControlBarrier %182 %182 %183 +%187 = OpAccessChain %186 %8 %185 +%188 = OpLoad %4 %187 +OpControlBarrier %182 %182 %183 +%190 = OpSGreaterThan %189 %188 %16 +OpSelectionMerge %191 None +OpBranchConditional %190 %192 %193 +%192 = OpLabel +OpControlBarrier %182 %182 %183 OpReturn -%39 = OpLabel +%193 = OpLabel OpReturn -%37 = OpLabel +%191 = OpLabel OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/workgroup-var-init.spvasm b/naga/tests/out/spv/workgroup-var-init.spvasm index 399f63855c..ebd0c90d1c 100644 --- a/naga/tests/out/spv/workgroup-var-init.spvasm +++ b/naga/tests/out/spv/workgroup-var-init.spvasm @@ -6,7 +6,7 @@ OpCapability Shader OpExtension "SPV_KHR_storage_buffer_storage_class" %1 = OpExtInstImport "GLSL.std.450" OpMemoryModel Logical GLSL450 -OpEntryPoint GLCompute %17 "main" %25 +OpEntryPoint GLCompute %17 "main" %23 OpExecutionMode %17 LocalSize 1 1 1 OpMemberName %10 0 "arr" OpMemberName %10 1 "atom" @@ -25,7 +25,7 @@ OpDecorate %13 DescriptorSet 0 OpDecorate %13 Binding 0 OpDecorate %14 Block OpMemberDecorate %14 0 Offset 0 -OpDecorate %25 BuiltIn LocalInvocationId +OpDecorate %23 BuiltIn LocalInvocationIndex %2 = OpTypeVoid %3 = OpTypeInt 32 0 %5 = OpConstant %3 512 @@ -43,13 +43,11 @@ OpDecorate %25 BuiltIn LocalInvocationId %18 = OpTypeFunction %2 %19 = OpTypePointer StorageBuffer %4 %20 = OpConstant %3 0 -%23 = OpConstantNull %10 -%24 = OpTypeVector %3 3 -%26 = OpTypePointer Input %24 -%25 = OpVariable %26 Input -%28 = OpConstantNull %24 +%24 = OpTypePointer Input %3 +%23 = OpVariable %24 Input +%29 = OpConstant %3 1 %30 = OpTypeBool -%29 = OpTypeVector %30 3 +%33 = OpConstantNull %10 %35 = OpConstant %3 2 %36 = OpConstant %3 264 %38 = OpTypePointer Workgroup %4 @@ -58,15 +56,20 @@ OpDecorate %25 BuiltIn LocalInvocationId %21 = OpAccessChain %19 %13 %20 OpBranch %22 %22 = OpLabel -%27 = OpLoad %24 %25 -%31 = OpIEqual %29 %27 %28 -%32 = OpAll %30 %31 -OpSelectionMerge %33 None -OpBranchConditional %32 %34 %33 +%25 = OpLoad %3 %23 +OpBranch %27 +%27 = OpLabel +%28 = OpULessThan %30 %25 %29 +OpSelectionMerge %31 None +OpBranchConditional %28 %32 %31 +%32 = OpLabel +OpStore %11 %33 +OpBranch %34 %34 = OpLabel -OpStore %11 %23 -OpBranch %33 -%33 = OpLabel +OpBranch %31 +%31 = OpLabel +OpBranch %26 +%26 = OpLabel OpControlBarrier %35 %35 %36 OpBranch %37 %37 = OpLabel diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs index bb93c690e8..5efd776bdb 100644 --- a/tests/tests/shader/mod.rs +++ b/tests/tests/shader/mod.rs @@ -18,6 +18,7 @@ use wgpu_test::TestingContext; pub mod numeric_builtins; pub mod struct_layout; pub mod zero_init_workgroup_mem; +mod zero_init_workgroup_mem_threads; #[derive(Clone, Copy, PartialEq)] enum InputStorageType { diff --git a/tests/tests/shader/zero_init_workgroup_mem.wgsl b/tests/tests/shader/zero_init_workgroup_mem.wgsl index 638b89edab..2a1aca9287 100644 --- a/tests/tests/shader/zero_init_workgroup_mem.wgsl +++ b/tests/tests/shader/zero_init_workgroup_mem.wgsl @@ -7,6 +7,8 @@ struct WStruct { var w_mem: WStruct; +var w_mem_array: array; + @group(0) @binding(0) var output: array; @@ -15,6 +17,7 @@ fn read(@builtin(workgroup_id) wgid: vec3, @builtin(num_workgroups) num_wor var is_zero = true; for(var i = 0u; i < array_size; i++) { is_zero &= w_mem.arr[i] == 0u; + is_zero &= w_mem_array[i] == 0u; } is_zero &= atomicLoad(&w_mem.atom) == 0u; @@ -26,6 +29,7 @@ fn read(@builtin(workgroup_id) wgid: vec3, @builtin(num_workgroups) num_wor fn write() { for(var i = 0u; i < array_size; i++) { w_mem.arr[i] = i; + w_mem_array[i] = i; } atomicStore(&w_mem.atom, 3u); } diff --git a/tests/tests/shader/zero_init_workgroup_mem_threads.rs b/tests/tests/shader/zero_init_workgroup_mem_threads.rs new file mode 100644 index 0000000000..c5516bf2f0 --- /dev/null +++ b/tests/tests/shader/zero_init_workgroup_mem_threads.rs @@ -0,0 +1,171 @@ +use std::num::NonZeroU64; + +use wgpu::{ + include_wgsl, Backends, BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, + BindGroupLayoutEntry, BindingResource, BindingType, BufferBinding, BufferBindingType, + BufferDescriptor, BufferUsages, CommandEncoderDescriptor, ComputePassDescriptor, + ComputePipelineDescriptor, DownlevelFlags, Limits, Maintain, MapMode, PipelineLayoutDescriptor, + ShaderStages, +}; + +use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters}; + +#[gpu_test] +static ZERO_INIT_WORKGROUP_MEMORY_THREADS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()) + // remove once we get to https://github.com/gfx-rs/wgpu/issues/3193 + .skip(FailureCase { + backends: Some(Backends::DX12), + vendor: Some(5140), + adapter: Some("Microsoft Basic Render Driver"), + ..FailureCase::default() + }), + ) + .run_async(|ctx| async move { + let bgl = ctx + .device + .create_bind_group_layout(&BindGroupLayoutDescriptor { + label: None, + entries: &[BindGroupLayoutEntry { + binding: 0, + visibility: ShaderStages::COMPUTE, + ty: BindingType::Buffer { + ty: BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: true, + min_binding_size: None, + }, + count: None, + }], + }); + + let output_buffer = ctx.device.create_buffer(&BufferDescriptor { + label: Some("output buffer"), + size: BUFFER_SIZE, + usage: BufferUsages::COPY_DST | BufferUsages::COPY_SRC | BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let mapping_buffer = ctx.device.create_buffer(&BufferDescriptor { + label: Some("mapping buffer"), + size: BUFFER_SIZE, + usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let bg = ctx.device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &bgl, + entries: &[BindGroupEntry { + binding: 0, + resource: BindingResource::Buffer(BufferBinding { + buffer: &output_buffer, + offset: 0, + size: Some(NonZeroU64::new(BUFFER_BINDING_SIZE as u64).unwrap()), + }), + }], + }); + + let pll = ctx + .device + .create_pipeline_layout(&PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }); + + let sm = ctx + .device + .create_shader_module(include_wgsl!("zero_init_workgroup_mem_threads.wgsl")); + + let pipeline_read = ctx + .device + .create_compute_pipeline(&ComputePipelineDescriptor { + label: Some("pipeline read"), + layout: Some(&pll), + module: &sm, + entry_point: "read_", + constants: &Default::default(), + }); + + let pipeline_write = ctx + .device + .create_compute_pipeline(&ComputePipelineDescriptor { + label: Some("pipeline write"), + layout: None, + module: &sm, + entry_point: "write_", + constants: &Default::default(), + }); + + // -- Initializing data -- + + let output_pre_init_data = vec![1; OUTPUT_ARRAY_SIZE as usize]; + ctx.queue.write_buffer( + &output_buffer, + 0, + bytemuck::cast_slice(&output_pre_init_data), + ); + + // -- Run test -- + + let mut encoder = ctx + .device + .create_command_encoder(&CommandEncoderDescriptor::default()); + + let mut cpass = encoder.begin_compute_pass(&ComputePassDescriptor::default()); + + cpass.set_pipeline(&pipeline_write); + for _ in 0..NR_OF_DISPATCHES { + cpass.dispatch_workgroups(DISPATCH_SIZE.0, DISPATCH_SIZE.1, DISPATCH_SIZE.2); + } + + cpass.set_pipeline(&pipeline_read); + for i in 0..NR_OF_DISPATCHES { + cpass.set_bind_group(0, &bg, &[i * BUFFER_BINDING_SIZE]); + cpass.dispatch_workgroups(DISPATCH_SIZE.0, DISPATCH_SIZE.1, DISPATCH_SIZE.2); + } + drop(cpass); + + // -- Pulldown data -- + + encoder.copy_buffer_to_buffer(&output_buffer, 0, &mapping_buffer, 0, BUFFER_SIZE); + + ctx.queue.submit(Some(encoder.finish())); + + mapping_buffer.slice(..).map_async(MapMode::Read, |_| ()); + ctx.async_poll(Maintain::wait()).await.panic_on_timeout(); + + let mapped = mapping_buffer.slice(..).get_mapped_range(); + + let typed: &[u32] = bytemuck::cast_slice(&mapped); + + // -- Check results -- + + let num_dispatches_failed = typed.iter().filter(|&&res| res != 0).count(); + let ratio = (num_dispatches_failed as f32 / OUTPUT_ARRAY_SIZE as f32) * 100.; + + assert!( + num_dispatches_failed == 0, + "Zero-initialization of workgroup memory failed ({ratio:.0}% of disptaches failed)." + ); + + drop(mapped); + mapping_buffer.unmap(); + }); + +const DISPATCH_SIZE: (u32, u32, u32) = (64, 64, 64); +const TOTAL_WORK_GROUPS: u32 = DISPATCH_SIZE.0 * DISPATCH_SIZE.1 * DISPATCH_SIZE.2; + +/// nr of bytes we use in the shader +const SHADER_WORKGROUP_MEMORY: u32 = 480 * 8 * 4; +// assume we have this much workgroup memory (2GB) +const MAX_DEVICE_WORKGROUP_MEMORY: u32 = i32::MAX as u32; +const NR_OF_DISPATCHES: u32 = + MAX_DEVICE_WORKGROUP_MEMORY / (SHADER_WORKGROUP_MEMORY * TOTAL_WORK_GROUPS) + 1; // TODO: use div_ceil once stabilized + +const OUTPUT_ARRAY_SIZE: u32 = TOTAL_WORK_GROUPS * NR_OF_DISPATCHES; +const BUFFER_SIZE: u64 = OUTPUT_ARRAY_SIZE as u64 * 4; +const BUFFER_BINDING_SIZE: u32 = TOTAL_WORK_GROUPS * 4; diff --git a/tests/tests/shader/zero_init_workgroup_mem_threads.wgsl b/tests/tests/shader/zero_init_workgroup_mem_threads.wgsl new file mode 100644 index 0000000000..0f9478a806 --- /dev/null +++ b/tests/tests/shader/zero_init_workgroup_mem_threads.wgsl @@ -0,0 +1,35 @@ +// Not a multiple of the workgroup size +const array_size = 480u; // 7.5 * 64 + +var w_mem_array: array, array_size>; + +@group(0) @binding(0) +var output: array; + +@compute @workgroup_size(8, 4, 2) +fn read_( + @builtin(workgroup_id) wgid: vec3, + @builtin(num_workgroups) num_workgroups: vec3, + @builtin(local_invocation_id) local_id: vec3 +) { + if all(local_id == vec3(0u, 0u, 0u)) { + var is_zero = true; + for (var i = 0u; i < array_size; i++) { + for (var j = 0u; j < 8; j++) { + is_zero &= w_mem_array[i][j] == 0u; + } + } + + let idx = wgid.x + (wgid.y * num_workgroups.x) + (wgid.z * num_workgroups.x * num_workgroups.y); + output[idx] = u32(!is_zero); + } +} + +@compute @workgroup_size(1) +fn write_() { + for (var i = 0u; i < array_size; i++) { + for (var j = 0u; j < 8; j++) { + w_mem_array[i][j] = i; + } + } +} diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 2665463792..4a8dc05b0b 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1639,7 +1639,7 @@ impl super::Adapter { .private_caps .zero_initialize_workgroup_memory { - spv::ZeroInitializeWorkgroupMemoryMode::Native + spv::ZeroInitializeWorkgroupMemoryMode::Polyfill } else { spv::ZeroInitializeWorkgroupMemoryMode::Polyfill },