From 08332a6f1fddf9c5d161f3b79934df2cae5de11a Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 29 Jul 2024 13:28:47 +0100 Subject: [PATCH 01/10] [MLIR][GPU-LLVM] Convert `gpu.func` to `llvm.func` Add support in `-convert-gpu-to-llvm-spv` to convert `gpu.func` to `llvm.func` operations. - `spir_kernel`/`spir_func` calling conventions used for kernels/functions. - `workgroup` attributions encoded as additional `llvm.ptr<3>` arguments. - No attribute used to annotate kernels - `reqd_work_group_size` attribute using to encode `gpu.known_block_size`. **Note**: A notable missing feature that will be addressed in a follow-up PR is a `-use-bare-ptr-memref-call-conv` option to replace MemRef arguments with bare pointers to the MemRef element types instead of the current MemRef descriptor approach. Signed-off-by: Victor Perez --- .../SPIRVCommon/AttrToLLVMConverter.h | 18 ++ mlir/lib/Conversion/CMakeLists.txt | 1 + .../Conversion/GPUCommon/GPUOpsLowering.cpp | 144 ++++++--- .../lib/Conversion/GPUCommon/GPUOpsLowering.h | 51 +++- .../Conversion/GPUToLLVMSPV/CMakeLists.txt | 2 + .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 25 +- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 16 +- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 9 +- .../SPIRVCommon/AttrToLLVMConverter.cpp | 61 ++++ .../lib/Conversion/SPIRVCommon/CMakeLists.txt | 6 + .../lib/Conversion/SPIRVToLLVM/CMakeLists.txt | 1 + .../Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp | 47 +-- .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 285 ++++++++++++++++++ 13 files changed, 556 insertions(+), 110 deletions(-) create mode 100644 mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h create mode 100644 mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp create mode 100644 mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt diff --git a/mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h b/mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h new file mode 100644 index 00000000000000..a99dd0fe6f133e --- /dev/null +++ b/mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h @@ -0,0 +1,18 @@ +//===- AttrToLLVMConverter.h - SPIR-V attributes conversion to LLVM - C++ -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_ +#define MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_ + +#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" + +namespace mlir { +unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI, + spirv::StorageClass storageClass); +} // namespace mlir + +#endif // MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_ diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index 80c8b84d9ae89a..813f700c5556e1 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -53,6 +53,7 @@ add_subdirectory(SCFToGPU) add_subdirectory(SCFToOpenMP) add_subdirectory(SCFToSPIRV) add_subdirectory(ShapeToStandard) +add_subdirectory(SPIRVCommon) add_subdirectory(SPIRVToLLVM) add_subdirectory(TensorToLinalg) add_subdirectory(TensorToSPIRV) diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 6053e34f30a418..0007294b3ff277 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -25,29 +25,58 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, Location loc = gpuFuncOp.getLoc(); SmallVector workgroupBuffers; - workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); - for (const auto [idx, attribution] : - llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { - auto type = dyn_cast(attribution.getType()); - assert(type && type.hasStaticShape() && "unexpected type in attribution"); - - uint64_t numElements = type.getNumElements(); - - auto elementType = - cast(typeConverter->convertType(type.getElementType())); - auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements); - std::string name = - std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx)); - uint64_t alignment = 0; - if (auto alignAttr = - dyn_cast_or_null(gpuFuncOp.getWorkgroupAttributionAttr( - idx, LLVM::LLVMDialect::getAlignAttrName()))) - alignment = alignAttr.getInt(); - auto globalOp = rewriter.create( - gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, - LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment, - workgroupAddrSpace); - workgroupBuffers.push_back(globalOp); + if (encodeWorkgroupAttributionsAsArguments) { + ArrayRef workgroupAttributions = + gpuFuncOp.getWorkgroupAttributions(); + std::size_t numAttributions = workgroupAttributions.size(); + + // Insert all arguments at the end. + unsigned index = gpuFuncOp.getNumArguments(); + SmallVector argIndices(numAttributions, index); + + // New arguments will simply be `llvm.ptr` with the correct address space + Type workgroupPtrType = + rewriter.getType(workgroupAddrSpace); + SmallVector argTypes(numAttributions, workgroupPtrType); + + // No argument attributes will be added + DictionaryAttr emptyDict = rewriter.getDictionaryAttr({}); + SmallVector argAttrs(numAttributions, emptyDict); + + // Location match function location + SmallVector argLocs(numAttributions, gpuFuncOp.getLoc()); + + // Perform signature modification + rewriter.modifyOpInPlace( + gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() { + static_cast(gpuFuncOp).insertArguments( + argIndices, argTypes, argAttrs, argLocs); + }); + } else { + workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); + for (const auto [idx, attribution] : + llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { + auto type = dyn_cast(attribution.getType()); + assert(type && type.hasStaticShape() && "unexpected type in attribution"); + + uint64_t numElements = type.getNumElements(); + + auto elementType = + cast(typeConverter->convertType(type.getElementType())); + auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements); + std::string name = + std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx)); + uint64_t alignment = 0; + if (auto alignAttr = dyn_cast_or_null( + gpuFuncOp.getWorkgroupAttributionAttr( + idx, LLVM::LLVMDialect::getAlignAttrName()))) + alignment = alignAttr.getInt(); + auto globalOp = rewriter.create( + gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, + LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment, + workgroupAddrSpace); + workgroupBuffers.push_back(globalOp); + } } // Remap proper input types. @@ -101,16 +130,20 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, // attribute. The former is necessary for further translation while the // latter is expected by gpu.launch_func. if (gpuFuncOp.isKernel()) { - attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr()); + if (kernelAttributeName) + attributes.emplace_back(*kernelAttributeName, rewriter.getUnitAttr()); // Set the dialect-specific block size attribute if there is one. if (kernelBlockSizeAttributeName.has_value() && knownBlockSize) { attributes.emplace_back(kernelBlockSizeAttributeName.value(), knownBlockSize); } } + LLVM::CConv callingConvention = gpuFuncOp.isKernel() + ? kernelCallingConvention + : nonKernelCallingConvention; auto llvmFuncOp = rewriter.create( gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, - LLVM::Linkage::External, /*dsoLocal=*/false, /*cconv=*/LLVM::CConv::C, + LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention, /*comdat=*/nullptr, attributes); { @@ -125,24 +158,49 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, rewriter.setInsertionPointToStart(&gpuFuncOp.front()); unsigned numProperArguments = gpuFuncOp.getNumArguments(); - for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) { - auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(), - global.getAddrSpace()); - Value address = rewriter.create( - loc, ptrType, global.getSymNameAttr()); - Value memory = - rewriter.create(loc, ptrType, global.getType(), address, - ArrayRef{0, 0}); - - // Build a memref descriptor pointing to the buffer to plug with the - // existing memref infrastructure. This may use more registers than - // otherwise necessary given that memref sizes are fixed, but we can try - // and canonicalize that away later. - Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx]; - auto type = cast(attribution.getType()); - auto descr = MemRefDescriptor::fromStaticShape( - rewriter, loc, *getTypeConverter(), type, memory); - signatureConversion.remapInput(numProperArguments + idx, descr); + if (encodeWorkgroupAttributionsAsArguments) { + unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions(); + assert(numProperArguments >= numAttributions && + "Expecting attributions to be encoded as arguments already"); + + // Arguments encoding workgroup attributions will be in positions + // [numProperArguments, numProperArguments+numAttributions) + ArrayRef attributionArguments = + gpuFuncOp.getArguments().slice(numProperArguments - numAttributions, + numAttributions); + for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal( + gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) { + auto [attribution, arg] = vals; + auto type = cast(attribution.getType()); + + // Arguments are of llvm.ptr type and attributions are of memref type: + // we need to wrap them in memref descriptors. + Value descr = MemRefDescriptor::fromStaticShape( + rewriter, loc, *getTypeConverter(), type, arg); + + // And remap the arguments + signatureConversion.remapInput(numProperArguments + idx, descr); + } + } else { + for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) { + auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(), + global.getAddrSpace()); + Value address = rewriter.create( + loc, ptrType, global.getSymNameAttr()); + Value memory = + rewriter.create(loc, ptrType, global.getType(), + address, ArrayRef{0, 0}); + + // Build a memref descriptor pointing to the buffer to plug with the + // existing memref infrastructure. This may use more registers than + // otherwise necessary given that memref sizes are fixed, but we can try + // and canonicalize that away later. + Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx]; + auto type = cast(attribution.getType()); + auto descr = MemRefDescriptor::fromStaticShape( + rewriter, loc, *getTypeConverter(), type, memory); + signatureConversion.remapInput(numProperArguments + idx, descr); + } } // Rewrite private memory attributions to alloca'ed buffers. diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index 92e69badc27ddf..781bea6b09406c 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -35,16 +35,39 @@ struct GPUDynamicSharedMemoryOpLowering unsigned alignmentBit; }; +struct GPUFuncOpLoweringOptions { + /// The address space to use for `alloca`s in private memory. + unsigned allocaAddrSpace; + /// The address space to use declaring workgroup memory. + unsigned workgroupAddrSpace; + + /// The attribute name to use instead of `gpu.kernel`. + std::optional kernelAttributeName = std::nullopt; + /// The attribute name to to set block size + std::optional kernelBlockSizeAttributeName = std::nullopt; + + /// The calling convention to use for kernel functions + LLVM::CConv kernelCallingConvention = LLVM::CConv::C; + /// The calling convention to use for non-kernel functions + LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; + + /// Whether to encode workgroup attributions as additional arguments instead + /// of a global variable. + bool encodeWorkgroupAttributionsAsArguments = false; +}; + struct GPUFuncOpLowering : ConvertOpToLLVMPattern { - GPUFuncOpLowering( - const LLVMTypeConverter &converter, unsigned allocaAddrSpace, - unsigned workgroupAddrSpace, StringAttr kernelAttributeName, - std::optional kernelBlockSizeAttributeName = std::nullopt) + GPUFuncOpLowering(const LLVMTypeConverter &converter, + const GPUFuncOpLoweringOptions &options) : ConvertOpToLLVMPattern(converter), - allocaAddrSpace(allocaAddrSpace), - workgroupAddrSpace(workgroupAddrSpace), - kernelAttributeName(kernelAttributeName), - kernelBlockSizeAttributeName(kernelBlockSizeAttributeName) {} + allocaAddrSpace(options.allocaAddrSpace), + workgroupAddrSpace(options.workgroupAddrSpace), + kernelAttributeName(options.kernelAttributeName), + kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName), + kernelCallingConvention(options.kernelCallingConvention), + nonKernelCallingConvention(options.nonKernelCallingConvention), + encodeWorkgroupAttributionsAsArguments( + options.encodeWorkgroupAttributionsAsArguments) {} LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, @@ -57,10 +80,18 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { unsigned workgroupAddrSpace; /// The attribute name to use instead of `gpu.kernel`. - StringAttr kernelAttributeName; - + std::optional kernelAttributeName; /// The attribute name to to set block size std::optional kernelBlockSizeAttributeName; + + /// The calling convention to use for kernel functions + LLVM::CConv kernelCallingConvention; + /// The calling convention to use for non-kernel functions + LLVM::CConv nonKernelCallingConvention; + + /// Whether to encode workgroup attributions as additional arguments instead + /// of a global variable. + bool encodeWorkgroupAttributionsAsArguments; }; /// The lowering of gpu.printf to a call to HIP hostcalls diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt index da5650b2b68dde..d47c5e679d86e8 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt @@ -6,7 +6,9 @@ add_mlir_conversion_library(MLIRGPUToLLVMSPV LINK_LIBS PUBLIC MLIRGPUDialect + MLIRGPUToGPURuntimeTransforms MLIRLLVMCommonConversion MLIRLLVMDialect + MLIRSPIRVAttrToLLVMConversion MLIRSPIRVDialect ) diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index 27d63b5f8948d4..74dd5f19c20f5e 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -8,15 +8,18 @@ #include "mlir/Conversion/GPUToLLVMSPV/GPUToLLVMSPVPass.h" +#include "../GPUCommon/GPUOpsLowering.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" +#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" @@ -321,8 +324,8 @@ struct GPUToLLVMSPVConversionPass final LLVMConversionTarget target(*context); target.addIllegalOp(); + gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp, + gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>(); populateGpuToLLVMSPVConversionPatterns(converter, patterns); @@ -340,11 +343,27 @@ struct GPUToLLVMSPVConversionPass final namespace mlir { void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter, RewritePatternSet &patterns) { - patterns.add, LaunchConfigOpConversion, LaunchConfigOpConversion, LaunchConfigOpConversion, LaunchConfigOpConversion>(typeConverter); + constexpr spirv::ClientAPI clientAPI = spirv::ClientAPI::OpenCL; + MLIRContext *context = &typeConverter.getContext(); + unsigned privateAddressSpace = + storageClassToAddressSpace(clientAPI, spirv::StorageClass::Function); + unsigned localAddressSpace = + storageClassToAddressSpace(clientAPI, spirv::StorageClass::Workgroup); + OperationName llvmFuncOpName(LLVM::LLVMFuncOp::getOperationName(), context); + StringAttr kernelBlockSizeAttributeName = + LLVM::LLVMFuncOp::getReqdWorkGroupSizeAttrName(llvmFuncOpName); + patterns.add( + typeConverter, + GPUFuncOpLoweringOptions{ + privateAddressSpace, localAddressSpace, + /*kernelAttributeName=*/std::nullopt, kernelBlockSizeAttributeName, + LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC, + /*encodeWorkgroupAttributionsAsArguments=*/true}); } } // namespace mlir diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index faa97caacb8851..060a1e1e82f75e 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -365,13 +365,15 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, // attributions since NVVM models it as `alloca`s in the default // memory space and does not support `alloca`s with addrspace(5). patterns.add( - converter, /*allocaAddrSpace=*/0, - /*workgroupAddrSpace=*/ - static_cast(NVVM::NVVMMemorySpace::kSharedMemorySpace), - StringAttr::get(&converter.getContext(), - NVVM::NVVMDialect::getKernelFuncAttrName()), - StringAttr::get(&converter.getContext(), - NVVM::NVVMDialect::getMaxntidAttrName())); + converter, + GPUFuncOpLoweringOptions{ + /*allocaAddrSpace=*/0, + /*workgroupAddrSpace=*/ + static_cast(NVVM::NVVMMemorySpace::kSharedMemorySpace), + StringAttr::get(&converter.getContext(), + NVVM::NVVMDialect::getKernelFuncAttrName()), + StringAttr::get(&converter.getContext(), + NVVM::NVVMDialect::getMaxntidAttrName())}); populateOpPatterns(converter, patterns, "__nv_fmodf", "__nv_fmod"); diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 100181cdc69fe7..564bab1ad92b90 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -372,10 +372,11 @@ void mlir::populateGpuToROCDLConversionPatterns( patterns.add(converter); patterns.add( converter, - /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace, - /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace, - rocdlDialect->getKernelAttrHelper().getName(), - rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName()); + GPUFuncOpLoweringOptions{ + /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace, + /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace, + rocdlDialect->getKernelAttrHelper().getName(), + rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName()}); if (Runtime::HIP == runtime) { patterns.add(converter); } else if (Runtime::OpenCL == runtime) { diff --git a/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp b/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp new file mode 100644 index 00000000000000..924bd1643f83b4 --- /dev/null +++ b/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp @@ -0,0 +1,61 @@ +//===- AttrToLLVMConverter.cpp - SPIR-V attributes conversion to LLVM -C++ ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +namespace { +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Constants +//===----------------------------------------------------------------------===// + +constexpr unsigned defaultAddressSpace = 0; + +//===----------------------------------------------------------------------===// +// Utility functions +//===----------------------------------------------------------------------===// + +static unsigned +storageClassToOCLAddressSpace(spirv::StorageClass storageClass) { + // Based on + // https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_binary_form + // and clang/lib/Basic/Targets/SPIR.h. + switch (storageClass) { + case spirv::StorageClass::Function: + return 0; + case spirv::StorageClass::Input: + case spirv::StorageClass::CrossWorkgroup: + return 1; + case spirv::StorageClass::UniformConstant: + return 2; + case spirv::StorageClass::Workgroup: + return 3; + case spirv::StorageClass::Generic: + return 4; + case spirv::StorageClass::DeviceOnlyINTEL: + return 5; + case spirv::StorageClass::HostOnlyINTEL: + return 6; + default: + return defaultAddressSpace; + } +} +} // namespace + +namespace mlir { +unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI, + spirv::StorageClass storageClass) { + switch (clientAPI) { + case spirv::ClientAPI::OpenCL: + return storageClassToOCLAddressSpace(storageClass); + default: + return defaultAddressSpace; + } +} +} // namespace mlir diff --git a/mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt b/mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt new file mode 100644 index 00000000000000..cd5a4c225efbf4 --- /dev/null +++ b/mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt @@ -0,0 +1,6 @@ +add_mlir_conversion_library(MLIRSPIRVAttrToLLVMConversion + AttrToLLVMConverter.cpp + + DEPENDS + MLIRSPIRVEnumsIncGen +) diff --git a/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt b/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt index 549785b154c1b2..e563315d95c9ca 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt @@ -18,6 +18,7 @@ add_mlir_conversion_library(MLIRSPIRVToLLVM MLIRLLVMCommonConversion MLIRLLVMDialect MLIRMemRefToLLVM + MLIRSPIRVAttrToLLVMConversion MLIRSPIRVDialect MLIRSPIRVUtils MLIRTransforms diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp index da09384bfbe895..ca786316324198 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp @@ -13,6 +13,7 @@ #include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVM.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" #include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" @@ -28,12 +29,6 @@ using namespace mlir; -//===----------------------------------------------------------------------===// -// Constants -//===----------------------------------------------------------------------===// - -constexpr unsigned defaultAddressSpace = 0; - //===----------------------------------------------------------------------===// // Utility functions //===----------------------------------------------------------------------===// @@ -273,47 +268,13 @@ static std::optional convertArrayType(spirv::ArrayType type, return LLVM::LLVMArrayType::get(llvmElementType, numElements); } -static unsigned mapToOpenCLAddressSpace(spirv::StorageClass storageClass) { - // Based on - // https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_binary_form - // and clang/lib/Basic/Targets/SPIR.h. - switch (storageClass) { -#define STORAGE_SPACE_MAP(storage, space) \ - case spirv::StorageClass::storage: \ - return space; - STORAGE_SPACE_MAP(Function, 0) - STORAGE_SPACE_MAP(CrossWorkgroup, 1) - STORAGE_SPACE_MAP(Input, 1) - STORAGE_SPACE_MAP(UniformConstant, 2) - STORAGE_SPACE_MAP(Workgroup, 3) - STORAGE_SPACE_MAP(Generic, 4) - STORAGE_SPACE_MAP(DeviceOnlyINTEL, 5) - STORAGE_SPACE_MAP(HostOnlyINTEL, 6) -#undef STORAGE_SPACE_MAP - default: - return defaultAddressSpace; - } -} - -static unsigned mapToAddressSpace(spirv::ClientAPI clientAPI, - spirv::StorageClass storageClass) { - switch (clientAPI) { -#define CLIENT_MAP(client, storage) \ - case spirv::ClientAPI::client: \ - return mapTo##client##AddressSpace(storage); - CLIENT_MAP(OpenCL, storageClass) -#undef CLIENT_MAP - default: - return defaultAddressSpace; - } -} - /// Converts SPIR-V pointer type to LLVM pointer. Pointer's storage class is not /// modelled at the moment. static Type convertPointerType(spirv::PointerType type, LLVMTypeConverter &converter, spirv::ClientAPI clientAPI) { - unsigned addressSpace = mapToAddressSpace(clientAPI, type.getStorageClass()); + unsigned addressSpace = + storageClassToAddressSpace(clientAPI, type.getStorageClass()); return LLVM::LLVMPointerType::get(type.getContext(), addressSpace); } @@ -822,7 +783,7 @@ class GlobalVariablePattern : LLVM::Linkage::External; auto newGlobalOp = rewriter.replaceOpWithNewOp( op, dstType, isConstant, linkage, op.getSymName(), Attribute(), - /*alignment=*/0, mapToAddressSpace(clientAPI, storageClass)); + /*alignment=*/0, storageClassToAddressSpace(clientAPI, storageClass)); // Attach location attribute if applicable if (op.getLocationAttr()) diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index bd7e5d139b0010..ce3cc9a6137d35 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -377,3 +377,288 @@ gpu.module @shuffles_mismatch { return } } + +// ----- + +gpu.module @kernels { +// CHECK: llvm.func spir_funccc @no_kernel() { + gpu.func @no_kernel() { + gpu.return + } + +// CHECK: llvm.func spir_kernelcc @kernel_no_arg() attributes {gpu.kernel} { + gpu.func @kernel_no_arg() kernel { + gpu.return + } + +// CHECK: llvm.func spir_kernelcc @kernel_with_args(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i64) attributes {gpu.kernel} { + gpu.func @kernel_with_args(%arg0: f32, %arg1: i64) kernel { + gpu.return + } + +// CHECK-64: llvm.func spir_kernelcc @kernel_with_conv_args(%[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: i64) attributes {gpu.kernel} { +// CHECK-32: llvm.func spir_kernelcc @kernel_with_conv_args(%[[VAL_2:.*]]: i32, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: i32) attributes {gpu.kernel} { + gpu.func @kernel_with_conv_args(%arg0: index, %arg1: memref) kernel { + gpu.return + } + +// CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr, %[[VAL_8:.*]]: i64, %[[VAL_9:.*]]: i64, %[[VAL_10:.*]]: i64) attributes {gpu.kernel} { +// CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr, %[[VAL_8:.*]]: i32, %[[VAL_9:.*]]: i32, %[[VAL_10:.*]]: i32) attributes {gpu.kernel} { + gpu.func @kernel_with_sized_memref(%arg0: memref<1xindex>) kernel { + gpu.return + } + +// CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%[[VAL_11:.*]]: !llvm.ptr, %[[VAL_12:.*]]: !llvm.ptr, %[[VAL_13:.*]]: i64, %[[VAL_14:.*]]: i64, %[[VAL_15:.*]]: i64, %[[VAL_16:.*]]: i64, %[[VAL_17:.*]]: i64, %[[VAL_18:.*]]: i64, %[[VAL_19:.*]]: i64) attributes {gpu.kernel} { +// CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%[[VAL_11:.*]]: !llvm.ptr, %[[VAL_12:.*]]: !llvm.ptr, %[[VAL_13:.*]]: i32, %[[VAL_14:.*]]: i32, %[[VAL_15:.*]]: i32, %[[VAL_16:.*]]: i32, %[[VAL_17:.*]]: i32, %[[VAL_18:.*]]: i32, %[[VAL_19:.*]]: i32) attributes {gpu.kernel} { + gpu.func @kernel_with_ND_memref(%arg0: memref<128x128x128xindex>) kernel { + gpu.return + } +} + +// ----- + +gpu.module @kernels { +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_private_attribs( +// CHECK-SAME: %[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i16) attributes {gpu.kernel} { +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(32 : i64) : i64 +// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x f32 : (i64) -> !llvm.ptr + +// CHECK-64: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_6:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_7:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_8:.*]] = llvm.insertvalue %[[VAL_7]], %[[VAL_6]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_9:.*]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-64: %[[VAL_10:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_8]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_12:.*]] = llvm.insertvalue %[[VAL_11]], %[[VAL_10]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32> + +// CHECK-32: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_6:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_7:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_8:.*]] = llvm.insertvalue %[[VAL_7]], %[[VAL_6]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_9:.*]] = llvm.mlir.constant(32 : index) : i32 +// CHECK-32: %[[VAL_10:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_8]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_12:.*]] = llvm.insertvalue %[[VAL_11]], %[[VAL_10]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32> + +// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(16 : i64) : i64 +// CHECK: %[[VAL_15:.*]] = llvm.alloca %[[VAL_14]] x i16 : (i64) -> !llvm.ptr + +// CHECK-64: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_18:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_19:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_20:.*]] = llvm.insertvalue %[[VAL_19]], %[[VAL_18]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_21:.*]] = llvm.mlir.constant(16 : index) : i64 +// CHECK-64: %[[VAL_22:.*]] = llvm.insertvalue %[[VAL_21]], %[[VAL_20]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_23:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_24:.*]] = llvm.insertvalue %[[VAL_23]], %[[VAL_22]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_25:.*]] = builtin.unrealized_conversion_cast %[[VAL_24]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16> + +// CHECK-32: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_18:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_19:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_20:.*]] = llvm.insertvalue %[[VAL_19]], %[[VAL_18]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_21:.*]] = llvm.mlir.constant(16 : index) : i32 +// CHECK-32: %[[VAL_22:.*]] = llvm.insertvalue %[[VAL_21]], %[[VAL_20]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_23:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_24:.*]] = llvm.insertvalue %[[VAL_23]], %[[VAL_22]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_25:.*]] = builtin.unrealized_conversion_cast %[[VAL_24]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16> + +// CHECK: %[[VAL_26:.*]] = arith.constant 0 : index +// CHECK: memref.store %[[VAL_0]], %[[VAL_13]]{{\[}}%[[VAL_26]]] : memref<32xf32> +// CHECK: memref.store %[[VAL_1]], %[[VAL_25]]{{\[}}%[[VAL_26]]] : memref<16xi16> + gpu.func @kernel_with_private_attribs(%arg0: f32, %arg1: i16) + private(%arg2: memref<32xf32>, %arg3: memref<16xi16>) + kernel { + %c0 = arith.constant 0 : index + memref.store %arg0, %arg2[%c0] : memref<32xf32> + memref.store %arg1, %arg3[%c0] : memref<16xi16> + gpu.return + } + +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attribs( +// CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, %[[VAL_29:.*]]: !llvm.ptr<3>, %[[VAL_30:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} { + +// CHECK-64: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_33:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_34:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_35:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_33]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_36:.*]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-64: %[[VAL_37:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_35]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_38:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_39:.*]] = llvm.insertvalue %[[VAL_38]], %[[VAL_37]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_40:.*]] = builtin.unrealized_conversion_cast %[[VAL_39]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32, 3> +// CHECK-64: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_44:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_46:.*]] = llvm.mlir.constant(16 : index) : i64 +// CHECK-64: %[[VAL_47:.*]] = llvm.insertvalue %[[VAL_46]], %[[VAL_45]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_48:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_47]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_50:.*]] = builtin.unrealized_conversion_cast %[[VAL_49]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16, 3> + +// CHECK-32: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_33:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_34:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_35:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_33]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_36:.*]] = llvm.mlir.constant(32 : index) : i32 +// CHECK-32: %[[VAL_37:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_35]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_38:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_39:.*]] = llvm.insertvalue %[[VAL_38]], %[[VAL_37]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_40:.*]] = builtin.unrealized_conversion_cast %[[VAL_39]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32, 3> +// CHECK-32: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_44:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_46:.*]] = llvm.mlir.constant(16 : index) : i32 +// CHECK-32: %[[VAL_47:.*]] = llvm.insertvalue %[[VAL_46]], %[[VAL_45]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_48:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_47]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_50:.*]] = builtin.unrealized_conversion_cast %[[VAL_49]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16, 3> + +// CHECK: %[[VAL_51:.*]] = arith.constant 0 : index +// CHECK: memref.store %[[VAL_27]], %[[VAL_40]]{{\[}}%[[VAL_51]]] : memref<32xf32, 3> +// CHECK: memref.store %[[VAL_28]], %[[VAL_50]]{{\[}}%[[VAL_51]]] : memref<16xi16, 3> + gpu.func @kernel_with_workgoup_attribs(%arg0: f32, %arg1: i16) + workgroup(%arg2: memref<32xf32, 3>, %arg3: memref<16xi16, 3>) + kernel { + %c0 = arith.constant 0 : index + memref.store %arg0, %arg2[%c0] : memref<32xf32, 3> + memref.store %arg1, %arg3[%c0] : memref<16xi16, 3> + gpu.return + } + +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attribs( +// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3>, %[[VAL_57:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} { +// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3>, %[[VAL_57:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} { + +// CHECK-64: %[[VAL_58:.*]] = builtin.unrealized_conversion_cast %[[VAL_55]] : i64 to index +// CHECK-64: %[[VAL_59:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_59]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_60]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_62:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_64:.*]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-64: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_66:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_67:.*]] = llvm.insertvalue %[[VAL_66]], %[[VAL_65]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_68:.*]] = builtin.unrealized_conversion_cast %[[VAL_67]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32, 3> +// CHECK-64: %[[VAL_69:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_70:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_69]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_71:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_70]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_72:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_73:.*]] = llvm.insertvalue %[[VAL_72]], %[[VAL_71]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_74:.*]] = llvm.mlir.constant(16 : index) : i64 +// CHECK-64: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_74]], %[[VAL_73]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_76:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_78:.*]] = builtin.unrealized_conversion_cast %[[VAL_77]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16, 3> + +// CHECK-32: %[[VAL_58:.*]] = builtin.unrealized_conversion_cast %[[VAL_55]] : i32 to index +// CHECK-32: %[[VAL_59:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_59]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_60]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_62:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_64:.*]] = llvm.mlir.constant(32 : index) : i32 +// CHECK-32: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_66:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_67:.*]] = llvm.insertvalue %[[VAL_66]], %[[VAL_65]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_68:.*]] = builtin.unrealized_conversion_cast %[[VAL_67]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32, 3> +// CHECK-32: %[[VAL_69:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_70:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_69]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_71:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_70]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_72:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_73:.*]] = llvm.insertvalue %[[VAL_72]], %[[VAL_71]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_74:.*]] = llvm.mlir.constant(16 : index) : i32 +// CHECK-32: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_74]], %[[VAL_73]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_76:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_78:.*]] = builtin.unrealized_conversion_cast %[[VAL_77]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16, 3> + +// CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64 +// CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr + +// CHECK-64: %[[VAL_81:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_82:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_81]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_82]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_84:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_85:.*]] = llvm.insertvalue %[[VAL_84]], %[[VAL_83]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_86:.*]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-64: %[[VAL_87:.*]] = llvm.insertvalue %[[VAL_86]], %[[VAL_85]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_88:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_89:.*]] = llvm.insertvalue %[[VAL_88]], %[[VAL_87]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_90:.*]] = builtin.unrealized_conversion_cast %[[VAL_89]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xi32> + +// CHECK-32: %[[VAL_81:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_82:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_81]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_82]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_84:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_85:.*]] = llvm.insertvalue %[[VAL_84]], %[[VAL_83]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_86:.*]] = llvm.mlir.constant(32 : index) : i32 +// CHECK-32: %[[VAL_87:.*]] = llvm.insertvalue %[[VAL_86]], %[[VAL_85]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_88:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_89:.*]] = llvm.insertvalue %[[VAL_88]], %[[VAL_87]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_90:.*]] = builtin.unrealized_conversion_cast %[[VAL_89]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xi32> + +// CHECK: %[[VAL_91:.*]] = llvm.mlir.constant(32 : i64) : i64 + +// CHECK-64: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i64 : (i64) -> !llvm.ptr +// CHECK-32: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i32 : (i64) -> !llvm.ptr + +// CHECK-64: %[[VAL_93:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_94:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_93]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_95:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_94]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_96:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK-64: %[[VAL_97:.*]] = llvm.insertvalue %[[VAL_96]], %[[VAL_95]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_98:.*]] = llvm.mlir.constant(32 : index) : i64 +// CHECK-64: %[[VAL_99:.*]] = llvm.insertvalue %[[VAL_98]], %[[VAL_97]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_100:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK-64: %[[VAL_101:.*]] = llvm.insertvalue %[[VAL_100]], %[[VAL_99]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-64: %[[VAL_102:.*]] = builtin.unrealized_conversion_cast %[[VAL_101]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xindex> + +// CHECK-32: %[[VAL_93:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_94:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_93]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_95:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_94]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_96:.*]] = llvm.mlir.constant(0 : index) : i32 +// CHECK-32: %[[VAL_97:.*]] = llvm.insertvalue %[[VAL_96]], %[[VAL_95]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_98:.*]] = llvm.mlir.constant(32 : index) : i32 +// CHECK-32: %[[VAL_99:.*]] = llvm.insertvalue %[[VAL_98]], %[[VAL_97]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_100:.*]] = llvm.mlir.constant(1 : index) : i32 +// CHECK-32: %[[VAL_101:.*]] = llvm.insertvalue %[[VAL_100]], %[[VAL_99]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-32: %[[VAL_102:.*]] = builtin.unrealized_conversion_cast %[[VAL_101]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xindex> + +// CHECK: %[[VAL_103:.*]] = arith.constant 0 : index +// CHECK: memref.store %[[VAL_52]], %[[VAL_68]]{{\[}}%[[VAL_103]]] : memref<32xf32, 3> +// CHECK: memref.store %[[VAL_53]], %[[VAL_78]]{{\[}}%[[VAL_103]]] : memref<16xi16, 3> +// CHECK: memref.store %[[VAL_54]], %[[VAL_90]]{{\[}}%[[VAL_103]]] : memref<32xi32> +// CHECK: memref.store %[[VAL_58]], %[[VAL_102]]{{\[}}%[[VAL_103]]] : memref<32xindex> + gpu.func @kernel_with_both_attribs(%arg0: f32, %arg1: i16, %arg2: i32, %arg3: index) + workgroup(%arg4: memref<32xf32, 3>, %arg5: memref<16xi16, 3>) + private(%arg6: memref<32xi32>, %arg7: memref<32xindex>) + kernel { + %c0 = arith.constant 0 : index + memref.store %arg0, %arg4[%c0] : memref<32xf32, 3> + memref.store %arg1, %arg5[%c0] : memref<16xi16, 3> + memref.store %arg2, %arg6[%c0] : memref<32xi32> + memref.store %arg3, %arg7[%c0] : memref<32xindex> + gpu.return + } + +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_known_block_size +// CHECK-SAME: reqd_work_group_size = array + gpu.func @kernel_known_block_size() kernel attributes {known_block_size = array} { + gpu.return + } +} From 098af95fc1b99de2dd1dd7025619eaf535840c58 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 5 Aug 2024 11:16:58 +0100 Subject: [PATCH 02/10] Apply suggestions and implement `llvm.mlir.workgroup_attrib_size` --- .../mlir/Dialect/LLVMIR/LLVMDialect.td | 4 + .../Conversion/GPUCommon/GPUOpsLowering.cpp | 49 +++- .../lib/Conversion/GPUCommon/GPUOpsLowering.h | 11 +- .../SPIRVCommon/AttrToLLVMConverter.cpp | 3 +- .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 232 +++--------------- 5 files changed, 97 insertions(+), 202 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td index c4c011f30b3bcd..8e933afbb02f1c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td @@ -58,6 +58,10 @@ def LLVM_Dialect : Dialect { /// effect when lowering to the LLVMDialect. static StringRef getReadnoneAttrName() { return "llvm.readnone"; } + /// Name of the helper attribute to keep GPU workgroup attribution size + /// information when converting from GPU to LLVM. + static StringRef getWorkgroupAttribSizeAttrName() { return "llvm.mlir.workgroup_attrib_size"; } + /// Verifies if the given string is a well-formed data layout descriptor. /// Uses `reportError` to report errors. static LogicalResult verifyDataLayoutString( diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 0007294b3ff277..b0d217650ba5f6 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -8,6 +8,7 @@ #include "GPUOpsLowering.h" +#include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Attributes.h" @@ -19,6 +20,22 @@ using namespace mlir; +namespace { +constexpr int64_t sizeQueryFailure = 0; + +static int64_t getAttributionSize(BlockArgument attribution, + const LLVMTypeConverter &converter, + const DataLayout &layout) { + auto attributionType = cast(attribution.getType()); + int64_t numElements = attributionType.getNumElements(); + Type elementType = converter.convertType(attributionType.getElementType()); + if (!elementType) + return sizeQueryFailure; + int64_t elementTypeSize = layout.getTypeSize(elementType); + return numElements * elementTypeSize; +} +} // namespace + LogicalResult GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { @@ -28,7 +45,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, if (encodeWorkgroupAttributionsAsArguments) { ArrayRef workgroupAttributions = gpuFuncOp.getWorkgroupAttributions(); - std::size_t numAttributions = workgroupAttributions.size(); + size_t numAttributions = workgroupAttributions.size(); // Insert all arguments at the end. unsigned index = gpuFuncOp.getNumArguments(); @@ -39,9 +56,30 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, rewriter.getType(workgroupAddrSpace); SmallVector argTypes(numAttributions, workgroupPtrType); - // No argument attributes will be added - DictionaryAttr emptyDict = rewriter.getDictionaryAttr({}); - SmallVector argAttrs(numAttributions, emptyDict); + // Attributes: noalias, llvm.mlir.workgroup_attrib_size() + std::array attrs{ + rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(), + rewriter.getUnitAttr()), + rewriter.getNamedAttr( + LLVM::LLVMDialect::getWorkgroupAttribSizeAttrName(), + rewriter.getUnitAttr()), + }; + SmallVector argAttrs; + assert(defaultLayout && "Expecting defaultLayout to be intialized"); + const DataLayout *layout = &*defaultLayout; + if (const DataLayoutAnalysis *analysis = + getTypeConverter()->getDataLayoutAnalysis()) { + layout = &analysis->getAbove(gpuFuncOp); + } + for (BlockArgument attribution : workgroupAttributions) { + int64_t dataSize = + getAttributionSize(attribution, *getTypeConverter(), *layout); + // Check for special failure value + if (dataSize == sizeQueryFailure) + return failure(); + attrs.back().setValue(rewriter.getI64IntegerAttr(dataSize)); + argAttrs.push_back(rewriter.getDictionaryAttr(attrs)); + } // Location match function location SmallVector argLocs(numAttributions, gpuFuncOp.getLoc()); @@ -54,7 +92,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, }); } else { workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); - for (const auto [idx, attribution] : + for (auto [idx, attribution] : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { auto type = dyn_cast(attribution.getType()); assert(type && type.hasStaticShape() && "unexpected type in attribution"); @@ -297,6 +335,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName()); copyPointerAttribute( LLVM::LLVMDialect::getDereferenceableOrNullAttrName()); + copyPointerAttribute(LLVM::LLVMDialect::getWorkgroupAttribSizeAttrName()); } } rewriter.eraseOp(gpuFuncOp); diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index 781bea6b09406c..0c8213c205269a 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -67,7 +67,10 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { kernelCallingConvention(options.kernelCallingConvention), nonKernelCallingConvention(options.nonKernelCallingConvention), encodeWorkgroupAttributionsAsArguments( - options.encodeWorkgroupAttributionsAsArguments) {} + options.encodeWorkgroupAttributionsAsArguments), + defaultLayout(options.encodeWorkgroupAttributionsAsArguments + ? std::optional(DataLayout()) + : std::optional()) {} LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, @@ -92,6 +95,12 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { /// Whether to encode workgroup attributions as additional arguments instead /// of a global variable. bool encodeWorkgroupAttributionsAsArguments; + + /// Default layout to use in absence of the corresponding analysis. + /// This will only be initialized if + /// encodeWorkgroupAttributionsAsArguments=true, as it will remain unused + /// otherwise. + std::optional defaultLayout; }; /// The lowering of gpu.printf to a call to HIP hostcalls diff --git a/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp b/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp index 924bd1643f83b4..7f83a474c3f93c 100644 --- a/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp +++ b/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp @@ -8,8 +8,8 @@ #include +namespace mlir { namespace { -using namespace mlir; //===----------------------------------------------------------------------===// // Constants @@ -48,7 +48,6 @@ storageClassToOCLAddressSpace(spirv::StorageClass storageClass) { } } // namespace -namespace mlir { unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI, spirv::StorageClass storageClass) { switch (clientAPI) { diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index ce3cc9a6137d35..f7dfa40b2da714 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -420,239 +420,83 @@ gpu.module @kernels { gpu.module @kernels { // CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_private_attribs( // CHECK-SAME: %[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i16) attributes {gpu.kernel} { + +// Private attribution is converted to an llvm.alloca + // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(32 : i64) : i64 // CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x f32 : (i64) -> !llvm.ptr -// CHECK-64: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_6:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_7:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_8:.*]] = llvm.insertvalue %[[VAL_7]], %[[VAL_6]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_9:.*]] = llvm.mlir.constant(32 : index) : i64 -// CHECK-64: %[[VAL_10:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_8]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_12:.*]] = llvm.insertvalue %[[VAL_11]], %[[VAL_10]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32> +// MemRef descriptor built from allocated pointer +// CHECK-64: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-32: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_6:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_7:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_8:.*]] = llvm.insertvalue %[[VAL_7]], %[[VAL_6]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_9:.*]] = llvm.mlir.constant(32 : index) : i32 -// CHECK-32: %[[VAL_10:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_8]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_12:.*]] = llvm.insertvalue %[[VAL_11]], %[[VAL_10]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32> + +// CHECK: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] +// CHECK: llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] + +// Same code as above // CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(16 : i64) : i64 // CHECK: %[[VAL_15:.*]] = llvm.alloca %[[VAL_14]] x i16 : (i64) -> !llvm.ptr // CHECK-64: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_18:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_19:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_20:.*]] = llvm.insertvalue %[[VAL_19]], %[[VAL_18]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_21:.*]] = llvm.mlir.constant(16 : index) : i64 -// CHECK-64: %[[VAL_22:.*]] = llvm.insertvalue %[[VAL_21]], %[[VAL_20]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_23:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_24:.*]] = llvm.insertvalue %[[VAL_23]], %[[VAL_22]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_25:.*]] = builtin.unrealized_conversion_cast %[[VAL_24]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16> - // CHECK-32: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_18:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_19:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_20:.*]] = llvm.insertvalue %[[VAL_19]], %[[VAL_18]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_21:.*]] = llvm.mlir.constant(16 : index) : i32 -// CHECK-32: %[[VAL_22:.*]] = llvm.insertvalue %[[VAL_21]], %[[VAL_20]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_23:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_24:.*]] = llvm.insertvalue %[[VAL_23]], %[[VAL_22]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_25:.*]] = builtin.unrealized_conversion_cast %[[VAL_24]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16> - -// CHECK: %[[VAL_26:.*]] = arith.constant 0 : index -// CHECK: memref.store %[[VAL_0]], %[[VAL_13]]{{\[}}%[[VAL_26]]] : memref<32xf32> -// CHECK: memref.store %[[VAL_1]], %[[VAL_25]]{{\[}}%[[VAL_26]]] : memref<16xi16> + +// CHECK: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] +// CHECK: llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] gpu.func @kernel_with_private_attribs(%arg0: f32, %arg1: i16) private(%arg2: memref<32xf32>, %arg3: memref<16xi16>) kernel { - %c0 = arith.constant 0 : index - memref.store %arg0, %arg2[%c0] : memref<32xf32> - memref.store %arg1, %arg3[%c0] : memref<16xi16> gpu.return } +// Workgroup attributions are converted to an llvm.ptr<3> argument + // CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attribs( -// CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, %[[VAL_29:.*]]: !llvm.ptr<3>, %[[VAL_30:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} { +// CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, +// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 128 : i64, llvm.noalias}, +// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 32 : i64, llvm.noalias}) attributes {gpu.kernel} { -// CHECK-64: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_33:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_34:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_35:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_33]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_36:.*]] = llvm.mlir.constant(32 : index) : i64 -// CHECK-64: %[[VAL_37:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_35]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_38:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_39:.*]] = llvm.insertvalue %[[VAL_38]], %[[VAL_37]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_40:.*]] = builtin.unrealized_conversion_cast %[[VAL_39]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32, 3> -// CHECK-64: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_44:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_46:.*]] = llvm.mlir.constant(16 : index) : i64 -// CHECK-64: %[[VAL_47:.*]] = llvm.insertvalue %[[VAL_46]], %[[VAL_45]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_48:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_47]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_50:.*]] = builtin.unrealized_conversion_cast %[[VAL_49]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16, 3> +// MemRef descriptor built from new argument +// CHECK-64: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> // CHECK-32: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_33:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_34:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_35:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_33]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_36:.*]] = llvm.mlir.constant(32 : index) : i32 -// CHECK-32: %[[VAL_37:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_35]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_38:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_39:.*]] = llvm.insertvalue %[[VAL_38]], %[[VAL_37]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_40:.*]] = builtin.unrealized_conversion_cast %[[VAL_39]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32, 3> + +// CHECK: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] +// CHECK: llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] + +// Same as above + +// CHECK-64: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> // CHECK-32: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_44:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_46:.*]] = llvm.mlir.constant(16 : index) : i32 -// CHECK-32: %[[VAL_47:.*]] = llvm.insertvalue %[[VAL_46]], %[[VAL_45]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_48:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_47]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_50:.*]] = builtin.unrealized_conversion_cast %[[VAL_49]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16, 3> - -// CHECK: %[[VAL_51:.*]] = arith.constant 0 : index -// CHECK: memref.store %[[VAL_27]], %[[VAL_40]]{{\[}}%[[VAL_51]]] : memref<32xf32, 3> -// CHECK: memref.store %[[VAL_28]], %[[VAL_50]]{{\[}}%[[VAL_51]]] : memref<16xi16, 3> + +// CHECK: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] +// CHECK: llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] gpu.func @kernel_with_workgoup_attribs(%arg0: f32, %arg1: i16) workgroup(%arg2: memref<32xf32, 3>, %arg3: memref<16xi16, 3>) kernel { - %c0 = arith.constant 0 : index - memref.store %arg0, %arg2[%c0] : memref<32xf32, 3> - memref.store %arg1, %arg3[%c0] : memref<16xi16, 3> gpu.return } +// Check with both private and workgroup attributions. Simply check additional +// arguments and a llvm.alloca are present. + // CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attribs( -// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3>, %[[VAL_57:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} { -// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3>, %[[VAL_57:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} { - -// CHECK-64: %[[VAL_58:.*]] = builtin.unrealized_conversion_cast %[[VAL_55]] : i64 to index -// CHECK-64: %[[VAL_59:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_59]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_60]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_62:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_64:.*]] = llvm.mlir.constant(32 : index) : i64 -// CHECK-64: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_66:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_67:.*]] = llvm.insertvalue %[[VAL_66]], %[[VAL_65]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_68:.*]] = builtin.unrealized_conversion_cast %[[VAL_67]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32, 3> -// CHECK-64: %[[VAL_69:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_70:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_69]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_71:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_70]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_72:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_73:.*]] = llvm.insertvalue %[[VAL_72]], %[[VAL_71]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_74:.*]] = llvm.mlir.constant(16 : index) : i64 -// CHECK-64: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_74]], %[[VAL_73]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_76:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_78:.*]] = builtin.unrealized_conversion_cast %[[VAL_77]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16, 3> - -// CHECK-32: %[[VAL_58:.*]] = builtin.unrealized_conversion_cast %[[VAL_55]] : i32 to index -// CHECK-32: %[[VAL_59:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_59]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_60]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_62:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_64:.*]] = llvm.mlir.constant(32 : index) : i32 -// CHECK-32: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_66:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_67:.*]] = llvm.insertvalue %[[VAL_66]], %[[VAL_65]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_68:.*]] = builtin.unrealized_conversion_cast %[[VAL_67]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32, 3> -// CHECK-32: %[[VAL_69:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_70:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_69]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_71:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_70]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_72:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_73:.*]] = llvm.insertvalue %[[VAL_72]], %[[VAL_71]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_74:.*]] = llvm.mlir.constant(16 : index) : i32 -// CHECK-32: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_74]], %[[VAL_73]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_76:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_78:.*]] = builtin.unrealized_conversion_cast %[[VAL_77]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16, 3> +// CHECK-SAME: %[[VAL_52:.*]]: f32, +// CHECK-SAME: %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, +// CHECK-64-SAME: %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 32 : i64, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 128 : i64, llvm.noalias}) attributes {gpu.kernel} { +// CHECK-32-SAME %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 32 : i64, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 64 : i64, llvm.noalias}) attributes {gpu.kernel} { // CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64 // CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr -// CHECK-64: %[[VAL_81:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_82:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_81]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_82]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_84:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_85:.*]] = llvm.insertvalue %[[VAL_84]], %[[VAL_83]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_86:.*]] = llvm.mlir.constant(32 : index) : i64 -// CHECK-64: %[[VAL_87:.*]] = llvm.insertvalue %[[VAL_86]], %[[VAL_85]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_88:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_89:.*]] = llvm.insertvalue %[[VAL_88]], %[[VAL_87]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_90:.*]] = builtin.unrealized_conversion_cast %[[VAL_89]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xi32> - -// CHECK-32: %[[VAL_81:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_82:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_81]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_82]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_84:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_85:.*]] = llvm.insertvalue %[[VAL_84]], %[[VAL_83]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_86:.*]] = llvm.mlir.constant(32 : index) : i32 -// CHECK-32: %[[VAL_87:.*]] = llvm.insertvalue %[[VAL_86]], %[[VAL_85]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_88:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_89:.*]] = llvm.insertvalue %[[VAL_88]], %[[VAL_87]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_90:.*]] = builtin.unrealized_conversion_cast %[[VAL_89]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xi32> - // CHECK: %[[VAL_91:.*]] = llvm.mlir.constant(32 : i64) : i64 - // CHECK-64: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i64 : (i64) -> !llvm.ptr // CHECK-32: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i32 : (i64) -> !llvm.ptr - -// CHECK-64: %[[VAL_93:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_94:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_93]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_95:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_94]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_96:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK-64: %[[VAL_97:.*]] = llvm.insertvalue %[[VAL_96]], %[[VAL_95]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_98:.*]] = llvm.mlir.constant(32 : index) : i64 -// CHECK-64: %[[VAL_99:.*]] = llvm.insertvalue %[[VAL_98]], %[[VAL_97]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_100:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK-64: %[[VAL_101:.*]] = llvm.insertvalue %[[VAL_100]], %[[VAL_99]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-64: %[[VAL_102:.*]] = builtin.unrealized_conversion_cast %[[VAL_101]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xindex> - -// CHECK-32: %[[VAL_93:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_94:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_93]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_95:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_94]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_96:.*]] = llvm.mlir.constant(0 : index) : i32 -// CHECK-32: %[[VAL_97:.*]] = llvm.insertvalue %[[VAL_96]], %[[VAL_95]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_98:.*]] = llvm.mlir.constant(32 : index) : i32 -// CHECK-32: %[[VAL_99:.*]] = llvm.insertvalue %[[VAL_98]], %[[VAL_97]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_100:.*]] = llvm.mlir.constant(1 : index) : i32 -// CHECK-32: %[[VAL_101:.*]] = llvm.insertvalue %[[VAL_100]], %[[VAL_99]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK-32: %[[VAL_102:.*]] = builtin.unrealized_conversion_cast %[[VAL_101]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xindex> - -// CHECK: %[[VAL_103:.*]] = arith.constant 0 : index -// CHECK: memref.store %[[VAL_52]], %[[VAL_68]]{{\[}}%[[VAL_103]]] : memref<32xf32, 3> -// CHECK: memref.store %[[VAL_53]], %[[VAL_78]]{{\[}}%[[VAL_103]]] : memref<16xi16, 3> -// CHECK: memref.store %[[VAL_54]], %[[VAL_90]]{{\[}}%[[VAL_103]]] : memref<32xi32> -// CHECK: memref.store %[[VAL_58]], %[[VAL_102]]{{\[}}%[[VAL_103]]] : memref<32xindex> gpu.func @kernel_with_both_attribs(%arg0: f32, %arg1: i16, %arg2: i32, %arg3: index) - workgroup(%arg4: memref<32xf32, 3>, %arg5: memref<16xi16, 3>) + workgroup(%arg4: memref<8xf32, 3>, %arg5: memref<16xindex, 3>) private(%arg6: memref<32xi32>, %arg7: memref<32xindex>) kernel { - %c0 = arith.constant 0 : index - memref.store %arg0, %arg4[%c0] : memref<32xf32, 3> - memref.store %arg1, %arg5[%c0] : memref<16xi16, 3> - memref.store %arg2, %arg6[%c0] : memref<32xi32> - memref.store %arg3, %arg7[%c0] : memref<32xindex> gpu.return } From 36d5bf05ea197d01a5a02616868563241ae8aac5 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 5 Aug 2024 13:52:46 +0100 Subject: [PATCH 03/10] Use tuple to encode workgroup attribution in LLVM --- .../mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 19 ++++++++ .../mlir/Dialect/LLVMIR/LLVMDialect.td | 6 +-- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 44 +++++-------------- .../lib/Conversion/GPUCommon/GPUOpsLowering.h | 11 +---- .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 10 ++--- mlir/test/Dialect/LLVMIR/func.mlir | 7 +++ 6 files changed, 46 insertions(+), 51 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 529c458ce12540..892c6e8832b781 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -1104,4 +1104,23 @@ def TailCallKindAttr : LLVM_Attr<"TailCallKind", "tailcallkind"> { let assemblyFormat = "`<` $tailCallKind `>`"; } +//===----------------------------------------------------------------------===// +// WorkgroupAttribAttr +//===----------------------------------------------------------------------===// + +def WorkgroupAttribAttr + : LLVM_Attr<"WorkgroupAttrib", "mlir.workgroup_attrib"> { + let summary = "GPU workgroup attribution information"; + let description = [{ + GPU workgroup attributions are `gpu.func` arguments encoding memory + allocations in the workgroup address space. These might be encoded as + `llvm.ptr` arguments in our dialect, missing type and size information. + This attribute can be use to keep this information when converting from + GPU to LLVM dialect. + }]; + let parameters = (ins "IntegerAttr":$num_elements, + "TypeAttr":$element_type); + let assemblyFormat = "`<` $num_elements `,` $element_type`>`"; +} + #endif // LLVMIR_ATTRDEFS diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td index 8e933afbb02f1c..1bf525e2aeb792 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td @@ -58,9 +58,9 @@ def LLVM_Dialect : Dialect { /// effect when lowering to the LLVMDialect. static StringRef getReadnoneAttrName() { return "llvm.readnone"; } - /// Name of the helper attribute to keep GPU workgroup attribution size - /// information when converting from GPU to LLVM. - static StringRef getWorkgroupAttribSizeAttrName() { return "llvm.mlir.workgroup_attrib_size"; } + /// Name of the helper attribute to keep GPU workgroup attribution size and + /// type information when converting from GPU to LLVM. + static StringRef getWorkgroupAttribAttrName() { return "llvm.mlir.workgroup_attrib"; } /// Verifies if the given string is a well-formed data layout descriptor. /// Uses `reportError` to report errors. diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index b0d217650ba5f6..68a940a1374363 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -8,7 +8,6 @@ #include "GPUOpsLowering.h" -#include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Attributes.h" @@ -20,22 +19,6 @@ using namespace mlir; -namespace { -constexpr int64_t sizeQueryFailure = 0; - -static int64_t getAttributionSize(BlockArgument attribution, - const LLVMTypeConverter &converter, - const DataLayout &layout) { - auto attributionType = cast(attribution.getType()); - int64_t numElements = attributionType.getNumElements(); - Type elementType = converter.convertType(attributionType.getElementType()); - if (!elementType) - return sizeQueryFailure; - int64_t elementTypeSize = layout.getTypeSize(elementType); - return numElements * elementTypeSize; -} -} // namespace - LogicalResult GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { @@ -60,24 +43,21 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, std::array attrs{ rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(), rewriter.getUnitAttr()), - rewriter.getNamedAttr( - LLVM::LLVMDialect::getWorkgroupAttribSizeAttrName(), - rewriter.getUnitAttr()), + rewriter.getNamedAttr(LLVM::LLVMDialect::getWorkgroupAttribAttrName(), + rewriter.getUnitAttr()), }; SmallVector argAttrs; - assert(defaultLayout && "Expecting defaultLayout to be intialized"); - const DataLayout *layout = &*defaultLayout; - if (const DataLayoutAnalysis *analysis = - getTypeConverter()->getDataLayoutAnalysis()) { - layout = &analysis->getAbove(gpuFuncOp); - } for (BlockArgument attribution : workgroupAttributions) { - int64_t dataSize = - getAttributionSize(attribution, *getTypeConverter(), *layout); - // Check for special failure value - if (dataSize == sizeQueryFailure) + auto attributionType = cast(attribution.getType()); + IntegerAttr numElements = + rewriter.getI64IntegerAttr(attributionType.getNumElements()); + Type llvmElementType = + getTypeConverter()->convertType(attributionType.getElementType()); + if (!llvmElementType) return failure(); - attrs.back().setValue(rewriter.getI64IntegerAttr(dataSize)); + TypeAttr type = TypeAttr::get(llvmElementType); + attrs.back().setValue( + rewriter.getAttr(numElements, type)); argAttrs.push_back(rewriter.getDictionaryAttr(attrs)); } @@ -335,7 +315,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName()); copyPointerAttribute( LLVM::LLVMDialect::getDereferenceableOrNullAttrName()); - copyPointerAttribute(LLVM::LLVMDialect::getWorkgroupAttribSizeAttrName()); + copyPointerAttribute(LLVM::LLVMDialect::getWorkgroupAttribAttrName()); } } rewriter.eraseOp(gpuFuncOp); diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index 0c8213c205269a..781bea6b09406c 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -67,10 +67,7 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { kernelCallingConvention(options.kernelCallingConvention), nonKernelCallingConvention(options.nonKernelCallingConvention), encodeWorkgroupAttributionsAsArguments( - options.encodeWorkgroupAttributionsAsArguments), - defaultLayout(options.encodeWorkgroupAttributionsAsArguments - ? std::optional(DataLayout()) - : std::optional()) {} + options.encodeWorkgroupAttributionsAsArguments) {} LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, @@ -95,12 +92,6 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { /// Whether to encode workgroup attributions as additional arguments instead /// of a global variable. bool encodeWorkgroupAttributionsAsArguments; - - /// Default layout to use in absence of the corresponding analysis. - /// This will only be initialized if - /// encodeWorkgroupAttributionsAsArguments=true, as it will remain unused - /// otherwise. - std::optional defaultLayout; }; /// The lowering of gpu.printf to a call to HIP hostcalls diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index f7dfa40b2da714..72b783380bf9bf 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -454,8 +454,8 @@ gpu.module @kernels { // CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attribs( // CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, -// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 128 : i64, llvm.noalias}, -// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 32 : i64, llvm.noalias}) attributes {gpu.kernel} { +// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<32 : i64, f32>, llvm.noalias}, +// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i16>, llvm.noalias}) attributes {gpu.kernel} { // MemRef descriptor built from new argument @@ -482,10 +482,8 @@ gpu.module @kernels { // arguments and a llvm.alloca are present. // CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attribs( -// CHECK-SAME: %[[VAL_52:.*]]: f32, -// CHECK-SAME: %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, -// CHECK-64-SAME: %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 32 : i64, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 128 : i64, llvm.noalias}) attributes {gpu.kernel} { -// CHECK-32-SAME %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 32 : i64, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib_size = 64 : i64, llvm.noalias}) attributes {gpu.kernel} { +// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i64>, llvm.noalias}) attributes {gpu.kernel} { +// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i32>, llvm.noalias}) attributes {gpu.kernel} { // CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64 // CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index 40b4e49f08a3ea..c648ec5880659c 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -472,3 +472,10 @@ llvm.func @reqd_work_group_size_hint() attributes {reqd_work_group_size = array< // CHECK: @intel_reqd_sub_group_size_hint() // CHECK-SAME: intel_reqd_sub_group_size = 32 : i32 llvm.func @intel_reqd_sub_group_size_hint() attributes {llvm.intel_reqd_sub_group_size = 32 : i32} + +// ----- + +// CHECK: @workgroup_attrib +// CHECK-SAME: llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32> +// CHECK-SAME: llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)> +llvm.func @workgroup_attrib(%arg0: !llvm.ptr {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32>}, %arg1: !llvm.ptr {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)>>}) From ed7b600086495cac9f73c1ea0e6b57d2486e82db Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 5 Aug 2024 14:33:08 +0100 Subject: [PATCH 04/10] Add doc --- mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 68a940a1374363..f186dc792f1b01 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -26,6 +26,9 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, SmallVector workgroupBuffers; if (encodeWorkgroupAttributionsAsArguments) { + // Append an `llvm.ptr` argument to the function signature to encode + // workgroup attributions. + ArrayRef workgroupAttributions = gpuFuncOp.getWorkgroupAttributions(); size_t numAttributions = workgroupAttributions.size(); @@ -177,6 +180,8 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, unsigned numProperArguments = gpuFuncOp.getNumArguments(); if (encodeWorkgroupAttributionsAsArguments) { + // Build a MemRefDescriptor with each of the arguments added above. + unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions(); assert(numProperArguments >= numAttributions && "Expecting attributions to be encoded as arguments already"); From 81bf21c4baa0e03d9254842568dc22ff4c6900af Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 5 Aug 2024 16:22:18 +0100 Subject: [PATCH 05/10] Use `discardableAttrs` --- mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td | 9 +++++---- mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp | 8 +++++--- mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 8 ++++---- mlir/test/Dialect/LLVMIR/func.mlir | 6 +++--- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td index 1bf525e2aeb792..0f848938b344bf 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td @@ -21,6 +21,11 @@ def LLVM_Dialect : Dialect { let hasRegionResultAttrVerify = 1; let hasOperationAttrVerify = 1; + let discardableAttrs = (ins + /// Attribute encoding size and type of GPU workgroup attributions. + "WorkgroupAttribAttr":$workgroup_attrib + ); + let extraClassDeclaration = [{ /// Name of the data layout attributes. static StringRef getDataLayoutAttrName() { return "llvm.data_layout"; } @@ -58,10 +63,6 @@ def LLVM_Dialect : Dialect { /// effect when lowering to the LLVMDialect. static StringRef getReadnoneAttrName() { return "llvm.readnone"; } - /// Name of the helper attribute to keep GPU workgroup attribution size and - /// type information when converting from GPU to LLVM. - static StringRef getWorkgroupAttribAttrName() { return "llvm.mlir.workgroup_attrib"; } - /// Verifies if the given string is a well-formed data layout descriptor. /// Uses `reportError` to report errors. static LogicalResult verifyDataLayoutString( diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index f186dc792f1b01..5cae0d0bba07b9 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -46,8 +46,9 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, std::array attrs{ rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(), rewriter.getUnitAttr()), - rewriter.getNamedAttr(LLVM::LLVMDialect::getWorkgroupAttribAttrName(), - rewriter.getUnitAttr()), + rewriter.getNamedAttr( + getDialect().getWorkgroupAttribAttrHelper().getName(), + rewriter.getUnitAttr()), }; SmallVector argAttrs; for (BlockArgument attribution : workgroupAttributions) { @@ -320,7 +321,8 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName()); copyPointerAttribute( LLVM::LLVMDialect::getDereferenceableOrNullAttrName()); - copyPointerAttribute(LLVM::LLVMDialect::getWorkgroupAttribAttrName()); + copyPointerAttribute( + LLVM::LLVMDialect::WorkgroupAttribAttrHelper::getNameStr()); } } rewriter.eraseOp(gpuFuncOp); diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index 72b783380bf9bf..7e9675b284578f 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -454,8 +454,8 @@ gpu.module @kernels { // CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attribs( // CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, -// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<32 : i64, f32>, llvm.noalias}, -// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i16>, llvm.noalias}) attributes {gpu.kernel} { +// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<32 : i64, f32>}, +// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i16>}) attributes {gpu.kernel} { // MemRef descriptor built from new argument @@ -482,8 +482,8 @@ gpu.module @kernels { // arguments and a llvm.alloca are present. // CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attribs( -// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i64>, llvm.noalias}) attributes {gpu.kernel} { -// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>, llvm.noalias}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i32>, llvm.noalias}) attributes {gpu.kernel} { +// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i64>}) attributes {gpu.kernel} { +// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i32>}) attributes {gpu.kernel} { // CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64 // CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index c648ec5880659c..aaac62befe0147 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -476,6 +476,6 @@ llvm.func @intel_reqd_sub_group_size_hint() attributes {llvm.intel_reqd_sub_grou // ----- // CHECK: @workgroup_attrib -// CHECK-SAME: llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32> -// CHECK-SAME: llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)> -llvm.func @workgroup_attrib(%arg0: !llvm.ptr {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32>}, %arg1: !llvm.ptr {llvm.mlir.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)>>}) +// CHECK-SAME: llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32> +// CHECK-SAME: llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)> +llvm.func @workgroup_attrib(%arg0: !llvm.ptr {llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32>}, %arg1: !llvm.ptr {llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)>>}) From 361c336cef3dafcb503fde6f775d502a0eec112c Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Tue, 6 Aug 2024 17:56:04 +0100 Subject: [PATCH 06/10] `attrib->attribution` --- .../mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 12 +++++------ .../mlir/Dialect/LLVMIR/LLVMDialect.td | 2 +- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 8 ++++---- .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 20 +++++++++---------- mlir/test/Dialect/LLVMIR/func.mlir | 8 ++++---- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 892c6e8832b781..c6d6261936e555 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -1105,18 +1105,18 @@ def TailCallKindAttr : LLVM_Attr<"TailCallKind", "tailcallkind"> { } //===----------------------------------------------------------------------===// -// WorkgroupAttribAttr +// WorkgroupAttributionAttr //===----------------------------------------------------------------------===// -def WorkgroupAttribAttr - : LLVM_Attr<"WorkgroupAttrib", "mlir.workgroup_attrib"> { +def WorkgroupAttributionAttr + : LLVM_Attr<"WorkgroupAttribution", "mlir.workgroup_attribution"> { let summary = "GPU workgroup attribution information"; let description = [{ GPU workgroup attributions are `gpu.func` arguments encoding memory allocations in the workgroup address space. These might be encoded as - `llvm.ptr` arguments in our dialect, missing type and size information. - This attribute can be use to keep this information when converting from - GPU to LLVM dialect. + `llvm.ptr` function arguments in our dialect, dropping type and size + information. This attribute can be attached to function arguments to keep + this information when converting from GPU to LLVM dialect. }]; let parameters = (ins "IntegerAttr":$num_elements, "TypeAttr":$element_type); diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td index 0f848938b344bf..7dc5d0522910e3 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td @@ -23,7 +23,7 @@ def LLVM_Dialect : Dialect { let discardableAttrs = (ins /// Attribute encoding size and type of GPU workgroup attributions. - "WorkgroupAttribAttr":$workgroup_attrib + "WorkgroupAttributionAttr":$workgroup_attribution ); let extraClassDeclaration = [{ diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 5cae0d0bba07b9..12ce5b5a5f4ad9 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -42,12 +42,12 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, rewriter.getType(workgroupAddrSpace); SmallVector argTypes(numAttributions, workgroupPtrType); - // Attributes: noalias, llvm.mlir.workgroup_attrib_size() + // Attributes: noalias, llvm.mlir.workgroup_attribution(, ) std::array attrs{ rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(), rewriter.getUnitAttr()), rewriter.getNamedAttr( - getDialect().getWorkgroupAttribAttrHelper().getName(), + getDialect().getWorkgroupAttributionAttrHelper().getName(), rewriter.getUnitAttr()), }; SmallVector argAttrs; @@ -61,7 +61,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, return failure(); TypeAttr type = TypeAttr::get(llvmElementType); attrs.back().setValue( - rewriter.getAttr(numElements, type)); + rewriter.getAttr(numElements, type)); argAttrs.push_back(rewriter.getDictionaryAttr(attrs)); } @@ -322,7 +322,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, copyPointerAttribute( LLVM::LLVMDialect::getDereferenceableOrNullAttrName()); copyPointerAttribute( - LLVM::LLVMDialect::WorkgroupAttribAttrHelper::getNameStr()); + LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr()); } } rewriter.eraseOp(gpuFuncOp); diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index 7e9675b284578f..2dae2957fbe652 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -418,7 +418,7 @@ gpu.module @kernels { // ----- gpu.module @kernels { -// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_private_attribs( +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_private_attributions( // CHECK-SAME: %[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i16) attributes {gpu.kernel} { // Private attribution is converted to an llvm.alloca @@ -444,7 +444,7 @@ gpu.module @kernels { // CHECK: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] // CHECK: llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] - gpu.func @kernel_with_private_attribs(%arg0: f32, %arg1: i16) + gpu.func @kernel_with_private_attributions(%arg0: f32, %arg1: i16) private(%arg2: memref<32xf32>, %arg3: memref<16xi16>) kernel { gpu.return @@ -452,10 +452,10 @@ gpu.module @kernels { // Workgroup attributions are converted to an llvm.ptr<3> argument -// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attribs( +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attributions( // CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, -// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<32 : i64, f32>}, -// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i16>}) attributes {gpu.kernel} { +// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<32 : i64, f32>}, +// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i16>}) attributes {gpu.kernel} { // MemRef descriptor built from new argument @@ -472,7 +472,7 @@ gpu.module @kernels { // CHECK: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] // CHECK: llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] - gpu.func @kernel_with_workgoup_attribs(%arg0: f32, %arg1: i16) + gpu.func @kernel_with_workgoup_attributions(%arg0: f32, %arg1: i16) workgroup(%arg2: memref<32xf32, 3>, %arg3: memref<16xi16, 3>) kernel { gpu.return @@ -481,9 +481,9 @@ gpu.module @kernels { // Check with both private and workgroup attributions. Simply check additional // arguments and a llvm.alloca are present. -// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attribs( -// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i64>}) attributes {gpu.kernel} { -// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<16 : i64, i32>}) attributes {gpu.kernel} { +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attributions( +// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i64>}) attributes {gpu.kernel} { +// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i32>}) attributes {gpu.kernel} { // CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64 // CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr @@ -491,7 +491,7 @@ gpu.module @kernels { // CHECK: %[[VAL_91:.*]] = llvm.mlir.constant(32 : i64) : i64 // CHECK-64: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i64 : (i64) -> !llvm.ptr // CHECK-32: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i32 : (i64) -> !llvm.ptr - gpu.func @kernel_with_both_attribs(%arg0: f32, %arg1: i16, %arg2: i32, %arg3: index) + gpu.func @kernel_with_both_attributions(%arg0: f32, %arg1: i16, %arg2: i32, %arg3: index) workgroup(%arg4: memref<8xf32, 3>, %arg5: memref<16xindex, 3>) private(%arg6: memref<32xi32>, %arg7: memref<32xindex>) kernel { diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index aaac62befe0147..e2a444c1faaba1 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -475,7 +475,7 @@ llvm.func @intel_reqd_sub_group_size_hint() attributes {llvm.intel_reqd_sub_grou // ----- -// CHECK: @workgroup_attrib -// CHECK-SAME: llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32> -// CHECK-SAME: llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)> -llvm.func @workgroup_attrib(%arg0: !llvm.ptr {llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<512 : i64, i32>}, %arg1: !llvm.ptr {llvm.workgroup_attrib = #llvm.mlir.workgroup_attrib<128 : i64, !llvm.struct<(i32, i64, f32)>>}) +// CHECK: @workgroup_attribution +// CHECK-SAME: llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<512 : i64, i32> +// CHECK-SAME: llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<128 : i64, !llvm.struct<(i32, i64, f32)> +llvm.func @workgroup_attribution(%arg0: !llvm.ptr {llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<512 : i64, i32>}, %arg1: !llvm.ptr {llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<128 : i64, !llvm.struct<(i32, i64, f32)>>}) From bf25aecc7e579a7d732dce1d5046441a76ea8421 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Tue, 6 Aug 2024 17:59:19 +0100 Subject: [PATCH 07/10] Change doc --- mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index c6d6261936e555..6df082814364fa 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -1112,11 +1112,13 @@ def WorkgroupAttributionAttr : LLVM_Attr<"WorkgroupAttribution", "mlir.workgroup_attribution"> { let summary = "GPU workgroup attribution information"; let description = [{ - GPU workgroup attributions are `gpu.func` arguments encoding memory + GPU workgroup attributions are `gpu.func` attributes encoding memory allocations in the workgroup address space. These might be encoded as - `llvm.ptr` function arguments in our dialect, dropping type and size - information. This attribute can be attached to function arguments to keep - this information when converting from GPU to LLVM dialect. + `llvm.ptr` function arguments in our dialect, but then type and size + information would be dropped. This attribute can be attached to `llvm.ptr` + function arguments encoding GPU workgroup attributions to mark them as + arguments encoding workgroup attributions and keeping type and size + information in our dialect. }]; let parameters = (ins "IntegerAttr":$num_elements, "TypeAttr":$element_type); From 3981cc80c75582da96a923b2e0558c96e2b5596f Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Wed, 7 Aug 2024 08:48:06 +0100 Subject: [PATCH 08/10] Address comments --- mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 2 +- mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp | 7 +++---- mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h | 8 ++++---- mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 6df082814364fa..5d96f506342588 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -1122,7 +1122,7 @@ def WorkgroupAttributionAttr }]; let parameters = (ins "IntegerAttr":$num_elements, "TypeAttr":$element_type); - let assemblyFormat = "`<` $num_elements `,` $element_type`>`"; + let assemblyFormat = "`<` $num_elements `,` $element_type `>`"; } #endif // LLVMIR_ATTRDEFS diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 12ce5b5a5f4ad9..5b590a457f7714 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -153,11 +153,10 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, // latter is expected by gpu.launch_func. if (gpuFuncOp.isKernel()) { if (kernelAttributeName) - attributes.emplace_back(*kernelAttributeName, rewriter.getUnitAttr()); + attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr()); // Set the dialect-specific block size attribute if there is one. - if (kernelBlockSizeAttributeName.has_value() && knownBlockSize) { - attributes.emplace_back(kernelBlockSizeAttributeName.value(), - knownBlockSize); + if (kernelBlockSizeAttributeName && knownBlockSize) { + attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize); } } LLVM::CConv callingConvention = gpuFuncOp.isKernel() diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index 781bea6b09406c..307b72b4b1f14f 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -42,9 +42,9 @@ struct GPUFuncOpLoweringOptions { unsigned workgroupAddrSpace; /// The attribute name to use instead of `gpu.kernel`. - std::optional kernelAttributeName = std::nullopt; + StringAttr kernelAttributeName; /// The attribute name to to set block size - std::optional kernelBlockSizeAttributeName = std::nullopt; + StringAttr kernelBlockSizeAttributeName; /// The calling convention to use for kernel functions LLVM::CConv kernelCallingConvention = LLVM::CConv::C; @@ -80,9 +80,9 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { unsigned workgroupAddrSpace; /// The attribute name to use instead of `gpu.kernel`. - std::optional kernelAttributeName; + StringAttr kernelAttributeName; /// The attribute name to to set block size - std::optional kernelBlockSizeAttributeName; + StringAttr kernelBlockSizeAttributeName; /// The calling convention to use for kernel functions LLVM::CConv kernelCallingConvention; diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index 74dd5f19c20f5e..36e4a6a38a68e4 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -362,7 +362,7 @@ void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter, typeConverter, GPUFuncOpLoweringOptions{ privateAddressSpace, localAddressSpace, - /*kernelAttributeName=*/std::nullopt, kernelBlockSizeAttributeName, + /*kernelAttributeName=*/{}, kernelBlockSizeAttributeName, LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC, /*encodeWorkgroupAttributionsAsArguments=*/true}); } From af5955af3c60e86ca1460b6151667785218c6c11 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Wed, 7 Aug 2024 08:51:57 +0100 Subject: [PATCH 09/10] Improve doc --- mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index 307b72b4b1f14f..444a07a93ca36e 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -41,14 +41,16 @@ struct GPUFuncOpLoweringOptions { /// The address space to use declaring workgroup memory. unsigned workgroupAddrSpace; - /// The attribute name to use instead of `gpu.kernel`. + /// The attribute name to use instead of `gpu.kernel`. Null if no attribute + /// should be used. StringAttr kernelAttributeName; - /// The attribute name to to set block size + /// The attribute name to to set block size. Null if no attribute should be + /// used. StringAttr kernelBlockSizeAttributeName; - /// The calling convention to use for kernel functions + /// The calling convention to use for kernel functions. LLVM::CConv kernelCallingConvention = LLVM::CConv::C; - /// The calling convention to use for non-kernel functions + /// The calling convention to use for non-kernel functions. LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; /// Whether to encode workgroup attributions as additional arguments instead @@ -79,9 +81,11 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { /// The address space to use declaring workgroup memory. unsigned workgroupAddrSpace; - /// The attribute name to use instead of `gpu.kernel`. + /// The attribute name to use instead of `gpu.kernel`. Null if no attribute + /// should be used. StringAttr kernelAttributeName; - /// The attribute name to to set block size + /// The attribute name to to set block size. Null if no attribute should be + /// used. StringAttr kernelBlockSizeAttributeName; /// The calling convention to use for kernel functions From 512724f4db478ebf14c9712cfca794ab6bfaa270 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Fri, 9 Aug 2024 10:58:40 +0100 Subject: [PATCH 10/10] Format tests --- .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 95 +++++++++---------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index 2dae2957fbe652..8e133288b832b6 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -381,35 +381,35 @@ gpu.module @shuffles_mismatch { // ----- gpu.module @kernels { -// CHECK: llvm.func spir_funccc @no_kernel() { + // CHECK: llvm.func spir_funccc @no_kernel() { gpu.func @no_kernel() { gpu.return } -// CHECK: llvm.func spir_kernelcc @kernel_no_arg() attributes {gpu.kernel} { + // CHECK: llvm.func spir_kernelcc @kernel_no_arg() attributes {gpu.kernel} { gpu.func @kernel_no_arg() kernel { gpu.return } -// CHECK: llvm.func spir_kernelcc @kernel_with_args(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i64) attributes {gpu.kernel} { + // CHECK: llvm.func spir_kernelcc @kernel_with_args(%{{.*}}: f32, %{{.*}}: i64) attributes {gpu.kernel} { gpu.func @kernel_with_args(%arg0: f32, %arg1: i64) kernel { gpu.return } -// CHECK-64: llvm.func spir_kernelcc @kernel_with_conv_args(%[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: i64) attributes {gpu.kernel} { -// CHECK-32: llvm.func spir_kernelcc @kernel_with_conv_args(%[[VAL_2:.*]]: i32, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: i32) attributes {gpu.kernel} { + // CHECK-64: llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i64, %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64) attributes {gpu.kernel} { + // CHECK-32: llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i32, %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32) attributes {gpu.kernel} { gpu.func @kernel_with_conv_args(%arg0: index, %arg1: memref) kernel { gpu.return } -// CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr, %[[VAL_8:.*]]: i64, %[[VAL_9:.*]]: i64, %[[VAL_10:.*]]: i64) attributes {gpu.kernel} { -// CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr, %[[VAL_8:.*]]: i32, %[[VAL_9:.*]]: i32, %[[VAL_10:.*]]: i32) attributes {gpu.kernel} { + // CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} { + // CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} { gpu.func @kernel_with_sized_memref(%arg0: memref<1xindex>) kernel { gpu.return } -// CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%[[VAL_11:.*]]: !llvm.ptr, %[[VAL_12:.*]]: !llvm.ptr, %[[VAL_13:.*]]: i64, %[[VAL_14:.*]]: i64, %[[VAL_15:.*]]: i64, %[[VAL_16:.*]]: i64, %[[VAL_17:.*]]: i64, %[[VAL_18:.*]]: i64, %[[VAL_19:.*]]: i64) attributes {gpu.kernel} { -// CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%[[VAL_11:.*]]: !llvm.ptr, %[[VAL_12:.*]]: !llvm.ptr, %[[VAL_13:.*]]: i32, %[[VAL_14:.*]]: i32, %[[VAL_15:.*]]: i32, %[[VAL_16:.*]]: i32, %[[VAL_17:.*]]: i32, %[[VAL_18:.*]]: i32, %[[VAL_19:.*]]: i32) attributes {gpu.kernel} { + // CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} { + // CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} { gpu.func @kernel_with_ND_memref(%arg0: memref<128x128x128xindex>) kernel { gpu.return } @@ -418,33 +418,32 @@ gpu.module @kernels { // ----- gpu.module @kernels { -// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_private_attributions( -// CHECK-SAME: %[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i16) attributes {gpu.kernel} { +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_private_attributions() attributes {gpu.kernel} { // Private attribution is converted to an llvm.alloca -// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(32 : i64) : i64 -// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x f32 : (i64) -> !llvm.ptr +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(32 : i64) : i64 +// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x f32 : (i64) -> !llvm.ptr // MemRef descriptor built from allocated pointer -// CHECK-64: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-32: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-64: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-32: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] -// CHECK: llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] +// CHECK: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] +// CHECK: llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] // Same code as above -// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(16 : i64) : i64 -// CHECK: %[[VAL_15:.*]] = llvm.alloca %[[VAL_14]] x i16 : (i64) -> !llvm.ptr +// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(16 : i64) : i64 +// CHECK: %[[VAL_15:.*]] = llvm.alloca %[[VAL_14]] x i16 : (i64) -> !llvm.ptr -// CHECK-64: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-32: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-64: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-32: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> -// CHECK: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] -// CHECK: llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] - gpu.func @kernel_with_private_attributions(%arg0: f32, %arg1: i16) +// CHECK: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] +// CHECK: llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] + gpu.func @kernel_with_private_attributions() private(%arg2: memref<32xf32>, %arg3: memref<16xi16>) kernel { gpu.return @@ -452,27 +451,26 @@ gpu.module @kernels { // Workgroup attributions are converted to an llvm.ptr<3> argument -// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attributions( -// CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, -// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<32 : i64, f32>}, -// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i16>}) attributes {gpu.kernel} { +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attributions( +// CHECK-SAME: %[[VAL_29:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<32 : i64, f32>}, +// CHECK-SAME: %[[VAL_30:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i16>}) attributes {gpu.kernel} { // MemRef descriptor built from new argument -// CHECK-64: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-32: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-64: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-32: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] -// CHECK: llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] +// CHECK: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] +// CHECK: llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] // Same as above -// CHECK-64: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> -// CHECK-32: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> +// CHECK-64: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-32: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> -// CHECK: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] -// CHECK: llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] - gpu.func @kernel_with_workgoup_attributions(%arg0: f32, %arg1: i16) +// CHECK: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] +// CHECK: llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] + gpu.func @kernel_with_workgoup_attributions() workgroup(%arg2: memref<32xf32, 3>, %arg3: memref<16xi16, 3>) kernel { gpu.return @@ -481,25 +479,26 @@ gpu.module @kernels { // Check with both private and workgroup attributions. Simply check additional // arguments and a llvm.alloca are present. -// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attributions( -// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i64>}) attributes {gpu.kernel} { -// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<8 : i64, f32>}, %[[VAL_57:.*]]: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i32>}) attributes {gpu.kernel} { +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attributions( +// CHECK-SAME: %{{.*}}: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<8 : i64, f32>}, +// CHECK-64-SAME: %{{.*}}: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i64>}) attributes {gpu.kernel} { +// CHECK-32-SAME: %{{.*}}: !llvm.ptr<3> {llvm.noalias, llvm.workgroup_attribution = #llvm.mlir.workgroup_attribution<16 : i64, i32>}) attributes {gpu.kernel} { -// CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64 -// CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr +// CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64 +// CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr -// CHECK: %[[VAL_91:.*]] = llvm.mlir.constant(32 : i64) : i64 -// CHECK-64: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i64 : (i64) -> !llvm.ptr -// CHECK-32: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i32 : (i64) -> !llvm.ptr - gpu.func @kernel_with_both_attributions(%arg0: f32, %arg1: i16, %arg2: i32, %arg3: index) +// CHECK: %[[VAL_91:.*]] = llvm.mlir.constant(32 : i64) : i64 +// CHECK-64: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i64 : (i64) -> !llvm.ptr +// CHECK-32: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i32 : (i64) -> !llvm.ptr + gpu.func @kernel_with_both_attributions() workgroup(%arg4: memref<8xf32, 3>, %arg5: memref<16xindex, 3>) private(%arg6: memref<32xi32>, %arg7: memref<32xindex>) kernel { gpu.return } -// CHECK-LABEL: llvm.func spir_kernelcc @kernel_known_block_size -// CHECK-SAME: reqd_work_group_size = array +// CHECK-LABEL: llvm.func spir_kernelcc @kernel_known_block_size +// CHECK-SAME: reqd_work_group_size = array gpu.func @kernel_known_block_size() kernel attributes {known_block_size = array} { gpu.return }