Skip to content

Commit

Permalink
[FunctionAttrs] Add the "initializes" attribute inference (llvm#97373)
Browse files Browse the repository at this point in the history
Add the "initializes" attribute inference.

This change is expected to have ~0.09% compile time regression, which
seems acceptable for interprocedural DSE.

https://llvm-compile-time-tracker.com/compare.php?from=9f10252c4ad7cffbbcf692fa9c953698f82ac4f5&to=56345c1cee4375eb5c28b8e7abf4803d20216b3b&stat=instructions%3Au
  • Loading branch information
haopliu authored Nov 19, 2024
1 parent ff9509e commit 661c593
Show file tree
Hide file tree
Showing 15 changed files with 972 additions and 77 deletions.
329 changes: 326 additions & 3 deletions llvm/lib/Transforms/IPO/FunctionAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
Expand All @@ -36,6 +37,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
Expand Down Expand Up @@ -581,6 +583,200 @@ struct ArgumentUsesTracker : public CaptureTracker {
const SCCNodeSet &SCCNodes;
};

/// A struct of argument use: a Use and the offset it accesses. This struct
/// is to track uses inside function via GEP. If GEP has a non-constant index,
/// the Offset field is nullopt.
struct ArgumentUse {
Use *U;
std::optional<int64_t> Offset;
};

/// A struct of argument access info. "Unknown" accesses are the cases like
/// unrecognized instructions, instructions that have more than one use of
/// the argument, or volatile memory accesses. "WriteWithSideEffect" are call
/// instructions that not only write an argument but also capture it.
struct ArgumentAccessInfo {
enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown };
AccessType ArgAccessType;
ConstantRangeList AccessRanges;
};

/// A struct to wrap the argument use info per block.
struct UsesPerBlockInfo {
SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts;
bool HasWrites = false;
bool HasUnknownAccess = false;
};

/// A struct to summarize the argument use info in a function.
struct ArgumentUsesSummary {
bool HasAnyWrite = false;
bool HasWriteOutsideEntryBB = false;
SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock;
};

ArgumentAccessInfo getArgmentAccessInfo(const Instruction *I,
const ArgumentUse &ArgUse,
const DataLayout &DL) {
auto GetTypeAccessRange =
[&DL](Type *Ty,
std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
auto TypeSize = DL.getTypeStoreSize(Ty);
if (!TypeSize.isScalable() && Offset) {
int64_t Size = TypeSize.getFixedValue();
return ConstantRange(APInt(64, *Offset, true),
APInt(64, *Offset + Size, true));
}
return std::nullopt;
};
auto GetConstantIntRange =
[](Value *Length,
std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
auto *ConstantLength = dyn_cast<ConstantInt>(Length);
if (ConstantLength && Offset)
return ConstantRange(
APInt(64, *Offset, true),
APInt(64, *Offset + ConstantLength->getSExtValue(), true));
return std::nullopt;
};
if (auto *SI = dyn_cast<StoreInst>(I)) {
if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) {
// Get the fixed type size of "SI". Since the access range of a write
// will be unioned, if "SI" doesn't have a fixed type size, we just set
// the access range to empty.
ConstantRangeList AccessRanges;
if (auto TypeAccessRange =
GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset))
AccessRanges.insert(*TypeAccessRange);
return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)};
}
} else if (auto *LI = dyn_cast<LoadInst>(I)) {
if (LI->isSimple()) {
assert(&LI->getOperandUse(0) == ArgUse.U);
// Get the fixed type size of "LI". Different from Write, if "LI"
// doesn't have a fixed type size, we conservatively set as a clobber
// with an empty access range.
if (auto TypeAccessRange =
GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset))
return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}};
}
} else if (auto *MemSet = dyn_cast<MemSetInst>(I)) {
if (!MemSet->isVolatile()) {
ConstantRangeList AccessRanges;
if (auto AccessRange =
GetConstantIntRange(MemSet->getLength(), ArgUse.Offset))
AccessRanges.insert(*AccessRange);
return {ArgumentAccessInfo::AccessType::Write, AccessRanges};
}
} else if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
if (!MTI->isVolatile()) {
if (&MTI->getOperandUse(0) == ArgUse.U) {
ConstantRangeList AccessRanges;
if (auto AccessRange =
GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
AccessRanges.insert(*AccessRange);
return {ArgumentAccessInfo::AccessType::Write, AccessRanges};
} else if (&MTI->getOperandUse(1) == ArgUse.U) {
if (auto AccessRange =
GetConstantIntRange(MTI->getLength(), ArgUse.Offset))
return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}};
}
}
} else if (auto *CB = dyn_cast<CallBase>(I)) {
if (CB->isArgOperand(ArgUse.U)) {
unsigned ArgNo = CB->getArgOperandNo(ArgUse.U);
bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes);
// Argument is a Write when parameter is writeonly/readnone
// and nocapture. Otherwise, it's a WriteWithSideEffect.
auto Access = CB->onlyWritesMemory(ArgNo) &&
CB->paramHasAttr(ArgNo, Attribute::NoCapture)
? ArgumentAccessInfo::AccessType::Write
: ArgumentAccessInfo::AccessType::WriteWithSideEffect;
ConstantRangeList AccessRanges;
if (IsInitialize && ArgUse.Offset) {
Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes);
ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
for (ConstantRange &CR : CBCRL)
AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset,
CR.getUpper() + *ArgUse.Offset));
return {Access, AccessRanges};
}
}
}
// Other unrecognized instructions are considered as unknown.
return {ArgumentAccessInfo::AccessType::Unknown, {}};
}

// Collect the uses of argument "A" in "F".
ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) {
auto &DL = F.getParent()->getDataLayout();
unsigned PointerSize =
DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace());
ArgumentUsesSummary Result;

BasicBlock &EntryBB = F.getEntryBlock();
SmallVector<ArgumentUse, 4> Worklist;
for (Use &U : A.uses())
Worklist.push_back({&U, 0});

// Update "UsesPerBlock" with the block of "I" as key and "Info" as value.
// Return true if the block of "I" has write accesses after updating.
auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) {
auto *BB = I->getParent();
auto &BBInfo = Result.UsesPerBlock[BB];
bool AlreadyVisitedInst = BBInfo.Insts.contains(I);
auto &IInfo = BBInfo.Insts[I];

// Instructions that have more than one use of the argument are considered
// as clobbers.
if (AlreadyVisitedInst) {
IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}};
BBInfo.HasUnknownAccess = true;
return false;
}

IInfo = std::move(Info);
BBInfo.HasUnknownAccess |=
IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown;
bool InfoHasWrites =
(IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
IInfo.ArgAccessType ==
ArgumentAccessInfo::AccessType::WriteWithSideEffect) &&
!IInfo.AccessRanges.empty();
BBInfo.HasWrites |= InfoHasWrites;
return InfoHasWrites;
};

// No need for a visited set because we don't look through phis, so there are
// no cycles.
while (!Worklist.empty()) {
ArgumentUse ArgUse = Worklist.pop_back_val();
User *U = ArgUse.U->getUser();
// Add GEP uses to worklist.
// If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt.
if (auto *GEP = dyn_cast<GEPOperator>(U)) {
std::optional<int64_t> NewOffset = std::nullopt;
if (ArgUse.Offset) {
APInt Offset(PointerSize, 0);
if (GEP->accumulateConstantOffset(DL, Offset))
NewOffset = *ArgUse.Offset + Offset.getSExtValue();
}
for (Use &U : GEP->uses())
Worklist.push_back({&U, NewOffset});
continue;
}

auto *I = cast<Instruction>(U);
bool HasWrite = UpdateUseInfo(I, getArgmentAccessInfo(I, ArgUse, DL));

Result.HasAnyWrite |= HasWrite;

if (HasWrite && I->getParent() != &EntryBB)
Result.HasWriteOutsideEntryBB = true;
}
return Result;
}

} // end anonymous namespace

namespace llvm {
Expand Down Expand Up @@ -867,9 +1063,129 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
return true;
}

static bool inferInitializes(Argument &A, Function &F) {
auto ArgumentUses = collectArgumentUsesPerBlock(A, F);
// No write anywhere in the function, bail.
if (!ArgumentUses.HasAnyWrite)
return false;

auto &UsesPerBlock = ArgumentUses.UsesPerBlock;
BasicBlock &EntryBB = F.getEntryBlock();
// A map to store the argument ranges initialized by a BasicBlock (including
// its successors).
DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
// Visit the successors of "BB" block and the instructions in BB (post-order)
// to get the argument ranges initialized by "BB" (including its successors).
// The result will be cached in "Initialized".
auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
auto UPB = UsesPerBlock.find(BB);
ConstantRangeList CRL;

// Start with intersection of successors.
// If this block has any clobbering use, we're going to clear out the
// ranges at some point in this block anyway, so don't bother looking at
// successors.
if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) {
bool HasAddedSuccessor = false;
for (auto *Succ : successors(BB)) {
if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) {
if (HasAddedSuccessor) {
CRL = CRL.intersectWith(SuccI->second);
} else {
CRL = SuccI->second;
HasAddedSuccessor = true;
}
} else {
CRL = ConstantRangeList();
break;
}
}
}

if (UPB != UsesPerBlock.end()) {
// Sort uses in this block by instruction order.
SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
append_range(Insts, UPB->second.Insts);
sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
return LHS.first->comesBefore(RHS.first);
});

// From the end of the block to the beginning of the block, set
// initializes ranges.
for (auto &[_, Info] : reverse(Insts)) {
if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown ||
Info.ArgAccessType ==
ArgumentAccessInfo::AccessType::WriteWithSideEffect)
CRL = ConstantRangeList();
if (!Info.AccessRanges.empty()) {
if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write ||
Info.ArgAccessType ==
ArgumentAccessInfo::AccessType::WriteWithSideEffect) {
CRL = CRL.unionWith(Info.AccessRanges);
} else {
assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
for (const auto &ReadRange : Info.AccessRanges)
CRL.subtract(ReadRange);
}
}
}
}
return CRL;
};

ConstantRangeList EntryCRL;
// If all write instructions are in the EntryBB, or if the EntryBB has
// a clobbering use, we only need to look at EntryBB.
bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB;
if (!OnlyScanEntryBlock)
if (auto EntryUPB = UsesPerBlock.find(&EntryBB);
EntryUPB != UsesPerBlock.end())
OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess;
if (OnlyScanEntryBlock) {
EntryCRL = VisitBlock(&EntryBB);
if (EntryCRL.empty())
return false;
} else {
// Now we have to go through CFG to get the initialized argument ranges
// across blocks. With dominance and post-dominance, the initialized ranges
// by a block include both accesses inside this block and accesses in its
// (transitive) successors. So visit successors before predecessors with a
// post-order walk of the blocks and memorize the results in "Initialized".
for (const BasicBlock *BB : post_order(&F)) {
ConstantRangeList CRL = VisitBlock(BB);
if (!CRL.empty())
Initialized[BB] = CRL;
}

auto EntryCRLI = Initialized.find(&EntryBB);
if (EntryCRLI == Initialized.end())
return false;

EntryCRL = EntryCRLI->second;
}

assert(!EntryCRL.empty() &&
"should have bailed already if EntryCRL is empty");

if (A.hasAttribute(Attribute::Initializes)) {
ConstantRangeList PreviousCRL =
A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList();
if (PreviousCRL == EntryCRL)
return false;
EntryCRL = EntryCRL.unionWith(PreviousCRL);
}

A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes,
EntryCRL.rangesRef()));

return true;
}

/// Deduce nocapture attributes for the SCC.
static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
SmallSet<Function *, 8> &Changed,
bool SkipInitializes) {
ArgumentGraph AG;

// Check each function in turn, determining which pointer arguments are not
Expand Down Expand Up @@ -937,6 +1253,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
if (addAccessAttr(&A, R))
Changed.insert(F);
}
if (!SkipInitializes && !A.onlyReadsMemory()) {
if (inferInitializes(A, *F))
Changed.insert(F);
}
}
}

Expand Down Expand Up @@ -1910,13 +2230,16 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,

SmallSet<Function *, 8> Changed;
if (ArgAttrsOnly) {
addArgumentAttrs(Nodes.SCCNodes, Changed);
// ArgAttrsOnly means to only infer attributes that may aid optimizations
// on the *current* function. "initializes" attribute is to aid
// optimizations (like DSE) on the callers, so skip "initializes" here.
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
return Changed;
}

addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
addArgumentAttrs(Nodes.SCCNodes, Changed);
addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
addColdAttrs(Nodes.SCCNodes, Changed);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind {
ret void
}

; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 {
; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 {
define void @test0_no(ptr %p) nounwind {
store i32 0, ptr %p, !tbaa !2
ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; Should have call to sincos declarations, not calls to the asm pseudo-libcalls
define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 {
; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]
Expand Down
Loading

0 comments on commit 661c593

Please sign in to comment.