Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MLIR][GPU-LLVM] Convert gpu.func to llvm.func #101664

Merged
merged 10 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//===- AttrToLLVMConverter.h - SPIR-V attributes conversion to LLVM - C++ -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_
#define MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_

#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"

namespace mlir {
unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI,
spirv::StorageClass storageClass);
} // namespace mlir

#endif // MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_
21 changes: 21 additions & 0 deletions mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
Original file line number Diff line number Diff line change
Expand Up @@ -1104,4 +1104,25 @@ def TailCallKindAttr : LLVM_Attr<"TailCallKind", "tailcallkind"> {
let assemblyFormat = "`<` $tailCallKind `>`";
}

//===----------------------------------------------------------------------===//
// WorkgroupAttributionAttr
//===----------------------------------------------------------------------===//

def WorkgroupAttributionAttr
: LLVM_Attr<"WorkgroupAttribution", "mlir.workgroup_attribution"> {
let summary = "GPU workgroup attribution information";
let description = [{
victor-eds marked this conversation as resolved.
Show resolved Hide resolved
GPU workgroup attributions are `gpu.func` attributes encoding memory
allocations in the workgroup address space. These might be encoded as
`llvm.ptr` function arguments in our dialect, but then type and size
information would be dropped. This attribute can be attached to `llvm.ptr`
function arguments encoding GPU workgroup attributions to mark them as
arguments encoding workgroup attributions and keeping type and size
information in our dialect.
}];
let parameters = (ins "IntegerAttr":$num_elements,
"TypeAttr":$element_type);
let assemblyFormat = "`<` $num_elements `,` $element_type `>`";
}

#endif // LLVMIR_ATTRDEFS
5 changes: 5 additions & 0 deletions mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.td
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ def LLVM_Dialect : Dialect {
let hasRegionResultAttrVerify = 1;
let hasOperationAttrVerify = 1;

let discardableAttrs = (ins
/// Attribute encoding size and type of GPU workgroup attributions.
"WorkgroupAttributionAttr":$workgroup_attribution
);

let extraClassDeclaration = [{
/// Name of the data layout attributes.
static StringRef getDataLayoutAttrName() { return "llvm.data_layout"; }
Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Conversion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ add_subdirectory(SCFToGPU)
add_subdirectory(SCFToOpenMP)
add_subdirectory(SCFToSPIRV)
add_subdirectory(ShapeToStandard)
add_subdirectory(SPIRVCommon)
add_subdirectory(SPIRVToLLVM)
add_subdirectory(TensorToLinalg)
add_subdirectory(TensorToSPIRV)
Expand Down
175 changes: 129 additions & 46 deletions mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,80 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
Location loc = gpuFuncOp.getLoc();

SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
for (const auto [idx, attribution] :
llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
auto type = dyn_cast<MemRefType>(attribution.getType());
assert(type && type.hasStaticShape() && "unexpected type in attribution");

uint64_t numElements = type.getNumElements();

auto elementType =
cast<Type>(typeConverter->convertType(type.getElementType()));
auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
std::string name =
std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
uint64_t alignment = 0;
if (auto alignAttr =
dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getWorkgroupAttributionAttr(
idx, LLVM::LLVMDialect::getAlignAttrName())))
alignment = alignAttr.getInt();
auto globalOp = rewriter.create<LLVM::GlobalOp>(
gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
workgroupAddrSpace);
workgroupBuffers.push_back(globalOp);
if (encodeWorkgroupAttributionsAsArguments) {
victor-eds marked this conversation as resolved.
Show resolved Hide resolved
// Append an `llvm.ptr` argument to the function signature to encode
// workgroup attributions.

ArrayRef<BlockArgument> workgroupAttributions =
gpuFuncOp.getWorkgroupAttributions();
size_t numAttributions = workgroupAttributions.size();

// Insert all arguments at the end.
unsigned index = gpuFuncOp.getNumArguments();
SmallVector<unsigned> argIndices(numAttributions, index);

// New arguments will simply be `llvm.ptr` with the correct address space
Type workgroupPtrType =
rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
SmallVector<Type> argTypes(numAttributions, workgroupPtrType);

// Attributes: noalias, llvm.mlir.workgroup_attribution(<size>, <type>)
std::array attrs{
rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
rewriter.getUnitAttr()),
rewriter.getNamedAttr(
getDialect().getWorkgroupAttributionAttrHelper().getName(),
rewriter.getUnitAttr()),
};
SmallVector<DictionaryAttr> argAttrs;
for (BlockArgument attribution : workgroupAttributions) {
auto attributionType = cast<MemRefType>(attribution.getType());
IntegerAttr numElements =
rewriter.getI64IntegerAttr(attributionType.getNumElements());
Type llvmElementType =
getTypeConverter()->convertType(attributionType.getElementType());
if (!llvmElementType)
return failure();
TypeAttr type = TypeAttr::get(llvmElementType);
attrs.back().setValue(
rewriter.getAttr<LLVM::WorkgroupAttributionAttr>(numElements, type));
argAttrs.push_back(rewriter.getDictionaryAttr(attrs));
}

// Location match function location
SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());

// Perform signature modification
rewriter.modifyOpInPlace(
gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
argIndices, argTypes, argAttrs, argLocs);
});
} else {
workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
for (auto [idx, attribution] :
llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
auto type = dyn_cast<MemRefType>(attribution.getType());
assert(type && type.hasStaticShape() && "unexpected type in attribution");

uint64_t numElements = type.getNumElements();

auto elementType =
cast<Type>(typeConverter->convertType(type.getElementType()));
auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
std::string name =
std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
uint64_t alignment = 0;
if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
gpuFuncOp.getWorkgroupAttributionAttr(
idx, LLVM::LLVMDialect::getAlignAttrName())))
alignment = alignAttr.getInt();
auto globalOp = rewriter.create<LLVM::GlobalOp>(
gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
workgroupAddrSpace);
workgroupBuffers.push_back(globalOp);
}
}

// Remap proper input types.
Expand Down Expand Up @@ -101,16 +152,19 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
// attribute. The former is necessary for further translation while the
// latter is expected by gpu.launch_func.
if (gpuFuncOp.isKernel()) {
attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
if (kernelAttributeName)
attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
// Set the dialect-specific block size attribute if there is one.
if (kernelBlockSizeAttributeName.has_value() && knownBlockSize) {
attributes.emplace_back(kernelBlockSizeAttributeName.value(),
knownBlockSize);
if (kernelBlockSizeAttributeName && knownBlockSize) {
attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
}
}
LLVM::CConv callingConvention = gpuFuncOp.isKernel()
? kernelCallingConvention
: nonKernelCallingConvention;
auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
LLVM::Linkage::External, /*dsoLocal=*/false, /*cconv=*/LLVM::CConv::C,
LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
/*comdat=*/nullptr, attributes);

{
Expand All @@ -125,24 +179,51 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
rewriter.setInsertionPointToStart(&gpuFuncOp.front());
unsigned numProperArguments = gpuFuncOp.getNumArguments();

for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
global.getAddrSpace());
Value address = rewriter.create<LLVM::AddressOfOp>(
loc, ptrType, global.getSymNameAttr());
Value memory =
rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(), address,
ArrayRef<LLVM::GEPArg>{0, 0});

// Build a memref descriptor pointing to the buffer to plug with the
// existing memref infrastructure. This may use more registers than
// otherwise necessary given that memref sizes are fixed, but we can try
// and canonicalize that away later.
Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
auto type = cast<MemRefType>(attribution.getType());
auto descr = MemRefDescriptor::fromStaticShape(
rewriter, loc, *getTypeConverter(), type, memory);
signatureConversion.remapInput(numProperArguments + idx, descr);
if (encodeWorkgroupAttributionsAsArguments) {
victor-eds marked this conversation as resolved.
Show resolved Hide resolved
// Build a MemRefDescriptor with each of the arguments added above.

unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
assert(numProperArguments >= numAttributions &&
"Expecting attributions to be encoded as arguments already");

// Arguments encoding workgroup attributions will be in positions
// [numProperArguments, numProperArguments+numAttributions)
ArrayRef<BlockArgument> attributionArguments =
gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
numAttributions);
for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
auto [attribution, arg] = vals;
auto type = cast<MemRefType>(attribution.getType());

// Arguments are of llvm.ptr type and attributions are of memref type:
// we need to wrap them in memref descriptors.
Value descr = MemRefDescriptor::fromStaticShape(
rewriter, loc, *getTypeConverter(), type, arg);

// And remap the arguments
signatureConversion.remapInput(numProperArguments + idx, descr);
}
} else {
for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
victor-eds marked this conversation as resolved.
Show resolved Hide resolved
auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
global.getAddrSpace());
Value address = rewriter.create<LLVM::AddressOfOp>(
loc, ptrType, global.getSymNameAttr());
Value memory =
rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(),
address, ArrayRef<LLVM::GEPArg>{0, 0});

// Build a memref descriptor pointing to the buffer to plug with the
// existing memref infrastructure. This may use more registers than
// otherwise necessary given that memref sizes are fixed, but we can try
// and canonicalize that away later.
Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
auto type = cast<MemRefType>(attribution.getType());
auto descr = MemRefDescriptor::fromStaticShape(
rewriter, loc, *getTypeConverter(), type, memory);
signatureConversion.remapInput(numProperArguments + idx, descr);
}
Comment on lines +208 to +226
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original code

}

// Rewrite private memory attributions to alloca'ed buffers.
Expand Down Expand Up @@ -239,6 +320,8 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
copyPointerAttribute(
LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
copyPointerAttribute(
LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
}
}
rewriter.eraseOp(gpuFuncOp);
Expand Down
59 changes: 47 additions & 12 deletions mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,41 @@ struct GPUDynamicSharedMemoryOpLowering
unsigned alignmentBit;
};

struct GPUFuncOpLoweringOptions {
/// The address space to use for `alloca`s in private memory.
unsigned allocaAddrSpace;
/// The address space to use declaring workgroup memory.
unsigned workgroupAddrSpace;
victor-eds marked this conversation as resolved.
Show resolved Hide resolved

/// The attribute name to use instead of `gpu.kernel`. Null if no attribute
/// should be used.
StringAttr kernelAttributeName;
/// The attribute name to to set block size. Null if no attribute should be
/// used.
StringAttr kernelBlockSizeAttributeName;

/// The calling convention to use for kernel functions.
LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
/// The calling convention to use for non-kernel functions.
LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;

/// Whether to encode workgroup attributions as additional arguments instead
/// of a global variable.
bool encodeWorkgroupAttributionsAsArguments = false;
};

struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
GPUFuncOpLowering(
const LLVMTypeConverter &converter, unsigned allocaAddrSpace,
unsigned workgroupAddrSpace, StringAttr kernelAttributeName,
std::optional<StringAttr> kernelBlockSizeAttributeName = std::nullopt)
GPUFuncOpLowering(const LLVMTypeConverter &converter,
const GPUFuncOpLoweringOptions &options)
: ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter),
allocaAddrSpace(allocaAddrSpace),
workgroupAddrSpace(workgroupAddrSpace),
kernelAttributeName(kernelAttributeName),
kernelBlockSizeAttributeName(kernelBlockSizeAttributeName) {}
allocaAddrSpace(options.allocaAddrSpace),
workgroupAddrSpace(options.workgroupAddrSpace),
kernelAttributeName(options.kernelAttributeName),
kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
kernelCallingConvention(options.kernelCallingConvention),
nonKernelCallingConvention(options.nonKernelCallingConvention),
encodeWorkgroupAttributionsAsArguments(
options.encodeWorkgroupAttributionsAsArguments) {}

LogicalResult
matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
Expand All @@ -56,11 +81,21 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
/// The address space to use declaring workgroup memory.
unsigned workgroupAddrSpace;

/// The attribute name to use instead of `gpu.kernel`.
/// The attribute name to use instead of `gpu.kernel`. Null if no attribute
/// should be used.
StringAttr kernelAttributeName;

/// The attribute name to to set block size
std::optional<StringAttr> kernelBlockSizeAttributeName;
/// The attribute name to to set block size. Null if no attribute should be
/// used.
StringAttr kernelBlockSizeAttributeName;

/// The calling convention to use for kernel functions
LLVM::CConv kernelCallingConvention;
FMarno marked this conversation as resolved.
Show resolved Hide resolved
/// The calling convention to use for non-kernel functions
LLVM::CConv nonKernelCallingConvention;

/// Whether to encode workgroup attributions as additional arguments instead
/// of a global variable.
bool encodeWorkgroupAttributionsAsArguments;
};

/// The lowering of gpu.printf to a call to HIP hostcalls
Expand Down
2 changes: 2 additions & 0 deletions mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ add_mlir_conversion_library(MLIRGPUToLLVMSPV

LINK_LIBS PUBLIC
MLIRGPUDialect
MLIRGPUToGPURuntimeTransforms
MLIRLLVMCommonConversion
MLIRLLVMDialect
MLIRSPIRVAttrToLLVMConversion
MLIRSPIRVDialect
)
Loading