Skip to content

Commit

Permalink
[AMDGPU] Add amdgpu-promote-pointer-kernargs pass
Browse files Browse the repository at this point in the history
- Enable it before infer-address-space pass.

Change-Id: I7a967f873de9318cf18a1e168026e1c1c2407d21
  • Loading branch information
mhbliao committed Jul 17, 2019
1 parent 154c6bc commit 7313a99
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 0 deletions.
3 changes: 3 additions & 0 deletions lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,9 @@ extern char &GCNRegBankReassignID;
void initializeGCNNSAReassignPass(PassRegistry &);
extern char &GCNNSAReassignID;

FunctionPass *createAMDGPUPromotePointerKernArgsToGlobalPass();
void initializeAMDGPUPromotePointerKernArgsToGlobalPass(PassRegistry &);

namespace AMDGPU {
enum TargetIndex {
TI_CONSTDATA_START,
Expand Down
72 changes: 72 additions & 0 deletions lib/Target/AMDGPU/AMDGPUPromotePointerKernArgsToGlobal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
//===-- AMDGPUPromotePointerKernArgsToGlobal.cpp - Promote pointer args ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Generic pointer kernel arguments need promoting to global ones.
//
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Pass.h"

using namespace llvm;

#define DEBUG_TYPE "amdgpu-promote-pointer-kernargs"

namespace {

class AMDGPUPromotePointerKernArgsToGlobal : public FunctionPass {
public:
static char ID;

AMDGPUPromotePointerKernArgsToGlobal() : FunctionPass(ID) {}

bool runOnFunction(Function &F) override;
};

} // End anonymous namespace

char AMDGPUPromotePointerKernArgsToGlobal::ID = 0;

INITIALIZE_PASS(AMDGPUPromotePointerKernArgsToGlobal, DEBUG_TYPE,
"Lower intrinsics", false, false)

bool AMDGPUPromotePointerKernArgsToGlobal::runOnFunction(Function &F) {
// Skip non-entry function.
if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return false;

auto &Entry = F.getEntryBlock();
IRBuilder<> IRB(&Entry, Entry.begin());

bool Changed = false;
for (auto &Arg : F.args()) {
auto PtrTy = dyn_cast<PointerType>(Arg.getType());
if (!PtrTy || PtrTy->getPointerAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
continue;

auto GlobalPtr =
IRB.CreateAddrSpaceCast(&Arg,
PointerType::get(PtrTy->getPointerElementType(),
AMDGPUAS::GLOBAL_ADDRESS),
Arg.getName());
auto NewFlatPtr = IRB.CreateAddrSpaceCast(GlobalPtr, PtrTy, Arg.getName());
Arg.replaceAllUsesWith(NewFlatPtr);
// Fix the global pointer itself.
cast<Instruction>(GlobalPtr)->setOperand(0, &Arg);
Changed = true;
}

return Changed;
}

FunctionPass *llvm::createAMDGPUPromotePointerKernArgsToGlobalPass() {
return new AMDGPUPromotePointerKernArgsToGlobal();
}
4 changes: 4 additions & 0 deletions lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUPromotePointerKernArgsToGlobalPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPUPropagateAttributesEarlyPass(*PR);
initializeAMDGPUPropagateAttributesLatePass(*PR);
Expand Down Expand Up @@ -447,6 +448,9 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.addExtension(
PassManagerBuilder::EP_CGSCCOptimizerLate,
[](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
// Premote generic pointer kernel arguments to global ones.
PM.add(llvm::createAMDGPUPromotePointerKernArgsToGlobalPass());

// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
PM.add(createInferAddressSpacesPass());
Expand Down
1 change: 1 addition & 0 deletions lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMCInstLower.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
AMDGPUPromoteAlloca.cpp
AMDGPUPromotePointerKernArgsToGlobal.cpp
AMDGPUPropagateAttributes.cpp
AMDGPURegisterBankInfo.cpp
AMDGPURegisterInfo.cpp
Expand Down
13 changes: 13 additions & 0 deletions test/CodeGen/AMDGPU/promote-pointer-kernargs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
; RUN: opt -O1 -S -o - -mtriple=amdgcn %s | FileCheck %s

; CHECK-LABEL: promote_pointer_kernargs
; CHECK-NEXT: addrspacecast i32* %{{.*}} to i32 addrspace(1)*
; CHECK-NEXT: addrspacecast i32* %{{.*}} to i32 addrspace(1)*
; CHECK-NEXT: load i32, i32 addrspace(1)*
; CHECK-NEXT: store i32 %{{.*}}, i32 addrspace(1)*
; CHECK-NEXT: ret void
define amdgpu_kernel void @promote_pointer_kernargs(i32* %out, i32* %in) {
%v = load i32, i32* %in
store i32 %v, i32* %out
ret void
}

0 comments on commit 7313a99

Please sign in to comment.