-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FunctionAttrs] Add the "initializes" attribute inference #97373
Conversation
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-coroutines Author: Haopeng Liu (haopliu) ChangesAdd the "initializes" attribute inference. This change is expected to have ~0.09% compile time regression, which seems acceptable for interprocedural DSE. Patch is 89.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97373.diff 15 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 7b419d0f098b5..507dbf4ef26f0 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -15,6 +15,7 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -36,6 +37,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
@@ -580,6 +582,205 @@ struct ArgumentUsesTracker : public CaptureTracker {
const SCCNodeSet &SCCNodes;
};
+struct ArgumentUse {
+ Use *U;
+ std::optional<int64_t> Offset;
+};
+
+// A struct of argument access info. "Unknown" accesses are the cases like
+// unrecognized instructions, instructions that have more than one use of
+// the argument, or volatile memory accesses. "Unknown" implies "IsClobber"
+// and an empty access range.
+// Write or Read accesses can be clobbers as well for example, a Load with
+// scalable type.
+struct ArgumentAccessInfo {
+ enum AccessType { Write, Read, Unknown };
+ AccessType ArgAccessType;
+ ConstantRangeList AccessRanges;
+ bool IsClobber = false;
+};
+
+struct UsesPerBlockInfo {
+ DenseMap<Instruction *, ArgumentAccessInfo> Insts;
+ bool HasWrites;
+ bool HasClobber;
+};
+
+ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I,
+ const ArgumentUse &IU,
+ const DataLayout &DL) {
+ auto GetTypeAccessRange =
+ [&DL](Type *Ty,
+ std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
+ auto TypeSize = DL.getTypeStoreSize(Ty);
+ if (!TypeSize.isScalable() && Offset.has_value()) {
+ int64_t Size = TypeSize.getFixedValue();
+ return ConstantRange(APInt(64, Offset.value(), true),
+ APInt(64, Offset.value() + Size, true));
+ }
+ return std::nullopt;
+ };
+ auto GetConstantIntRange =
+ [](Value *Length,
+ std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
+ auto *ConstantLength = dyn_cast<ConstantInt>(Length);
+ if (ConstantLength && Offset.has_value()) {
+ return ConstantRange(
+ APInt(64, Offset.value(), true),
+ APInt(64, Offset.value() + ConstantLength->getSExtValue(), true));
+ }
+ return std::nullopt;
+ };
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ if (&SI->getOperandUse(1) == IU.U) {
+ // Get the fixed type size of "SI". Since the access range of a write
+ // will be unioned, if "SI" doesn't have a fixed type size, we just set
+ // the access range to empty.
+ ConstantRangeList AccessRanges;
+ auto TypeAccessRange = GetTypeAccessRange(SI->getAccessType(), IU.Offset);
+ if (TypeAccessRange.has_value())
+ AccessRanges.insert(TypeAccessRange.value());
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+ /*IsClobber=*/false};
+ }
+ } else if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (&LI->getOperandUse(0) == IU.U) {
+ // Get the fixed type size of "LI". Different from Write, if "LI"
+ // doesn't have a fixed type size, we conservatively set as a clobber
+ // with an empty access range.
+ auto TypeAccessRange = GetTypeAccessRange(LI->getAccessType(), IU.Offset);
+ if (TypeAccessRange.has_value())
+ return {ArgumentAccessInfo::AccessType::Read,
+ {TypeAccessRange.value()},
+ /*IsClobber=*/false};
+ else
+ return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true};
+ }
+ } else if (auto *MemSet = dyn_cast<MemSetInst>(I)) {
+ if (!MemSet->isVolatile()) {
+ ConstantRangeList AccessRanges;
+ auto AccessRange = GetConstantIntRange(MemSet->getLength(), IU.Offset);
+ if (AccessRange.has_value())
+ AccessRanges.insert(AccessRange.value());
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+ /*IsClobber=*/false};
+ }
+ } else if (auto *MemCpy = dyn_cast<MemCpyInst>(I)) {
+ if (!MemCpy->isVolatile()) {
+ if (&MemCpy->getOperandUse(0) == IU.U) {
+ ConstantRangeList AccessRanges;
+ auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset);
+ if (AccessRange.has_value())
+ AccessRanges.insert(AccessRange.value());
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+ /*IsClobber=*/false};
+ } else if (&MemCpy->getOperandUse(1) == IU.U) {
+ auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset);
+ if (AccessRange.has_value())
+ return {ArgumentAccessInfo::AccessType::Read,
+ {AccessRange.value()},
+ /*IsClobber=*/false};
+ else
+ return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true};
+ }
+ }
+ } else if (auto *CB = dyn_cast<CallBase>(I)) {
+ if (CB->isArgOperand(IU.U)) {
+ unsigned ArgNo = CB->getArgOperandNo(IU.U);
+ bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes);
+ // Argument is only not clobbered when parameter is writeonly/readnone
+ // and nocapture.
+ bool IsClobber = !(CB->onlyWritesMemory(ArgNo) &&
+ CB->paramHasAttr(ArgNo, Attribute::NoCapture));
+ ConstantRangeList AccessRanges;
+ if (IsInitialize && IU.Offset.has_value()) {
+ Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes);
+ if (!Attr.isValid()) {
+ Attr = CB->getCalledFunction()->getParamAttribute(
+ ArgNo, Attribute::Initializes);
+ }
+ ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
+ for (ConstantRange &CR : CBCRL) {
+ AccessRanges.insert(ConstantRange(CR.getLower() + IU.Offset.value(),
+ CR.getUpper() + IU.Offset.value()));
+ }
+ return {ArgumentAccessInfo::AccessType::Write, AccessRanges, IsClobber};
+ }
+ }
+ }
+ // Unrecognized instructions are considered clobbers.
+ return {ArgumentAccessInfo::AccessType::Unknown, {}, /*IsClobber=*/true};
+}
+
+std::pair<bool, bool> CollectArgumentUsesPerBlock(
+ Argument &A, Function &F,
+ DenseMap<const BasicBlock *, UsesPerBlockInfo> &UsesPerBlock) {
+ auto &DL = F.getParent()->getDataLayout();
+ auto PointerSize =
+ DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace());
+
+ bool HasAnyWrite = false;
+ bool HasWriteOutsideEntryBB = false;
+
+ BasicBlock &EntryBB = F.getEntryBlock();
+ SmallVector<ArgumentUse, 4> Worklist;
+ for (Use &U : A.uses())
+ Worklist.push_back({&U, 0});
+
+ auto UpdateUseInfo = [&UsesPerBlock](Instruction *I,
+ ArgumentAccessInfo Info) {
+ auto *BB = I->getParent();
+ auto &BBInfo = UsesPerBlock.getOrInsertDefault(BB);
+ bool AlreadyVisitedInst = BBInfo.Insts.contains(I);
+ auto &IInfo = BBInfo.Insts[I];
+
+ // Instructions that have more than one use of the argument are considered
+ // as clobbers.
+ if (AlreadyVisitedInst) {
+ IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}, true};
+ BBInfo.HasClobber = true;
+ return false;
+ }
+
+ IInfo = Info;
+ BBInfo.HasClobber |= IInfo.IsClobber;
+ BBInfo.HasWrites |=
+ (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write &&
+ !IInfo.AccessRanges.empty());
+ return !IInfo.AccessRanges.empty();
+ };
+
+ // No need for a visited set because we don't look through phis, so there are
+ // no cycles.
+ while (!Worklist.empty()) {
+ ArgumentUse IU = Worklist.pop_back_val();
+ User *U = IU.U->getUser();
+ // Add GEP uses to worklist.
+ // If the GEP is not a constant GEP, set IsInitialize to false.
+ if (auto *GEP = dyn_cast<GEPOperator>(U)) {
+ APInt Offset(PointerSize, 0, /*isSigned=*/true);
+ bool IsConstGEP = GEP->accumulateConstantOffset(DL, Offset);
+ std::optional<int64_t> NewOffset = std::nullopt;
+ if (IsConstGEP && IU.Offset.has_value()) {
+ NewOffset = *IU.Offset + Offset.getSExtValue();
+ }
+ for (Use &U : GEP->uses())
+ Worklist.push_back({&U, NewOffset});
+ continue;
+ }
+
+ auto *I = cast<Instruction>(U);
+ bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, IU, DL));
+
+ HasAnyWrite |= HasWrite;
+
+ if (HasWrite && I->getParent() != &EntryBB) {
+ HasWriteOutsideEntryBB = true;
+ }
+ }
+ return {HasAnyWrite, HasWriteOutsideEntryBB};
+}
+
} // end anonymous namespace
namespace llvm {
@@ -866,9 +1067,132 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
return true;
}
+static bool inferInitializes(Argument &A, Function &F) {
+ DenseMap<const BasicBlock *, UsesPerBlockInfo> UsesPerBlock;
+ auto [HasAnyWrite, HasWriteOutsideEntryBB] =
+ CollectArgumentUsesPerBlock(A, F, UsesPerBlock);
+ // No write anywhere in the function, bail.
+ if (!HasAnyWrite)
+ return false;
+
+ BasicBlock &EntryBB = F.getEntryBlock();
+ DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
+ auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
+ auto UPB = UsesPerBlock.find(BB);
+
+ // If this block has uses and none are writes, the argument is not
+ // initialized in this block.
+ if (UPB != UsesPerBlock.end() && !UPB->second.HasWrites)
+ return ConstantRangeList();
+
+ ConstantRangeList CRL;
+
+ // Start with intersection of successors.
+ // If this block has any clobbering use, we're going to clear out the
+ // ranges at some point in this block anyway, so don't bother looking at
+ // successors.
+ if (UPB == UsesPerBlock.end() || !UPB->second.HasClobber) {
+ bool HasAddedSuccessor = false;
+ for (auto *Succ : successors(BB)) {
+ if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) {
+ if (HasAddedSuccessor) {
+ CRL = CRL.intersectWith(SuccI->second);
+ } else {
+ CRL = SuccI->second;
+ HasAddedSuccessor = true;
+ }
+ } else {
+ CRL = ConstantRangeList();
+ break;
+ }
+ }
+ }
+
+ if (UPB != UsesPerBlock.end()) {
+ // Sort uses in this block by instruction order.
+ SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
+ append_range(Insts, UPB->second.Insts);
+ sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
+ std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
+ return LHS.first->comesBefore(RHS.first);
+ });
+
+ // From the end of the block to the beginning of the block, set
+ // initializes ranges.
+ for (auto [_, Info] : reverse(Insts)) {
+ if (Info.IsClobber) {
+ CRL = ConstantRangeList();
+ }
+ if (!Info.AccessRanges.empty()) {
+ if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write) {
+ CRL = CRL.unionWith(Info.AccessRanges);
+ } else {
+ assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
+ for (const auto &ReadRange : Info.AccessRanges) {
+ CRL.subtract(ReadRange);
+ }
+ }
+ }
+ }
+ }
+ return CRL;
+ };
+
+ ConstantRangeList EntryCRL;
+ // If all write instructions are in the EntryBB, or if the EntryBB has
+ // a clobbering use, we only need to look at EntryBB.
+ bool OnlyScanEntryBlock = !HasWriteOutsideEntryBB;
+ if (!OnlyScanEntryBlock) {
+ if (auto EntryUPB = UsesPerBlock.find(&EntryBB);
+ EntryUPB != UsesPerBlock.end()) {
+ OnlyScanEntryBlock = EntryUPB->second.HasClobber;
+ }
+ }
+ if (OnlyScanEntryBlock) {
+ EntryCRL = VisitBlock(&EntryBB);
+ if (EntryCRL.empty()) {
+ return false;
+ }
+ } else {
+ // Visit successors before predecessors with a post-order walk of the
+ // blocks.
+ for (const BasicBlock *BB : post_order(&F)) {
+ ConstantRangeList CRL = VisitBlock(BB);
+ if (!CRL.empty()) {
+ Initialized[BB] = CRL;
+ }
+ }
+
+ auto EntryCRLI = Initialized.find(&EntryBB);
+ if (EntryCRLI == Initialized.end()) {
+ return false;
+ }
+
+ EntryCRL = EntryCRLI->second;
+ }
+
+ assert(!EntryCRL.empty() &&
+ "should have bailed already if EntryCRL is empty");
+
+ if (A.hasAttribute(Attribute::Initializes)) {
+ ConstantRangeList PreviousCRL =
+ A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList();
+ if (PreviousCRL == EntryCRL) {
+ return false;
+ }
+ EntryCRL = EntryCRL.unionWith(PreviousCRL);
+ }
+
+ A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes,
+ EntryCRL.rangesRef()));
+
+ return true;
+}
+
/// Deduce nocapture attributes for the SCC.
static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
- SmallSet<Function *, 8> &Changed) {
+ SmallSet<Function *, 8> &Changed,
+ bool SkipInitializes) {
ArgumentGraph AG;
// Check each function in turn, determining which pointer arguments are not
@@ -936,6 +1260,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
if (addAccessAttr(&A, R))
Changed.insert(F);
}
+ if (!SkipInitializes && !A.onlyReadsMemory()) {
+ if (inferInitializes(A, *F))
+ Changed.insert(F);
+ }
}
}
@@ -1844,13 +2172,13 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
SmallSet<Function *, 8> Changed;
if (ArgAttrsOnly) {
- addArgumentAttrs(Nodes.SCCNodes, Changed);
+ addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
return Changed;
}
addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
- addArgumentAttrs(Nodes.SCCNodes, Changed);
+ addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
addWillReturn(Nodes.SCCNodes, Changed);
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index bea56a72bdeae..8615363a985d1 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind {
ret void
}
-; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 {
+; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 {
define void @test0_no(ptr %p) nounwind {
store i32 0, ptr %p, !tbaa !2
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
index 6b835bb4eef66..317a069eed26e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
@@ -10,7 +10,7 @@
; Should have call to sincos declarations, not calls to the asm pseudo-libcalls
define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 {
; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
-; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
index a35fbaadddf9e..619124affff81 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
@@ -49,7 +49,7 @@ declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) #1
define void @sincos_f16_nocontract(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_f16_nocontract
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call half @_Z3sinDh(half [[X]])
; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
@@ -68,7 +68,7 @@ entry:
define void @sincos_v2f16_nocontract(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_v2f16_nocontract
-; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]])
; CHECK-NEXT: store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
@@ -87,7 +87,7 @@ entry:
define void @sincos_f16(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_f16
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call contract half @_Z3sinDh(half [[X]])
; CHECK-NEXT: store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
@@ -105,7 +105,7 @@ entry:
define void @sincos_f16_order1(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_f16_order1
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL1:%.*]] = tail call contract half @_Z3cosDh(half [[X]])
; CHECK-NEXT: store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2
@@ -123,7 +123,7 @@ entry:
define void @sincos_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
; CHECK-LABEL: define void @sincos_v2f16
-; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (<2 x half> [[X:%.*]], p...
[truncated]
|
return {ArgumentAccessInfo::AccessType::Write, AccessRanges, | ||
/*IsClobber=*/false}; | ||
} | ||
} else if (auto *MemCpy = dyn_cast<MemCpyInst>(I)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be MemTransferInst to cover memmove too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, good point! Changed to MemTransferInst and added unit test for memmove as well.
return {ArgumentAccessInfo::AccessType::Read, | ||
{AccessRange.value()}, | ||
/*IsClobber=*/false}; | ||
else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No else after return
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
@@ -580,6 +582,205 @@ struct ArgumentUsesTracker : public CaptureTracker { | |||
const SCCNodeSet &SCCNodes; | |||
}; | |||
|
|||
struct ArgumentUse { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missing documentation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for reminding! Done.
// Write or Read accesses can be clobbers as well for example, a Load with | ||
// scalable type. | ||
struct ArgumentAccessInfo { | ||
enum AccessType { Write, Read, Unknown }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
enum class
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
BBInfo.HasWrites |= | ||
(IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write && | ||
!IInfo.AccessRanges.empty()); | ||
return !IInfo.AccessRanges.empty(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does the return value mean "HasWrite" or is it more subtle than that? If it means "HasWrite", should it match the "BBInfo.HasWrites " above in that it also checks ArgAccessType == Write
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch! Updated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
even when lgtm, we should land this only after the DSE patch lands as to not unnecessarily increase compile time without any benefit
can you add [FunctionAttrs]
to the beginning of the commit title?
} | ||
|
||
auto EntryCRLI = Initialized.find(&EntryBB); | ||
if (EntryCRLI == Initialized.end()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you cleanup the instances of https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, thanks for reminding! Done.
[&DL](Type *Ty, | ||
std::optional<int64_t> Offset) -> std::optional<ConstantRange> { | ||
auto TypeSize = DL.getTypeStoreSize(Ty); | ||
if (!TypeSize.isScalable() && Offset.has_value()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in many if-statements, you write optional.has_value()
. optional
is sufficient and the preferred form.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Thanks!
if (!TypeSize.isScalable() && Offset.has_value()) { | ||
int64_t Size = TypeSize.getFixedValue(); | ||
return ConstantRange(APInt(64, Offset.value(), true), | ||
APInt(64, Offset.value() + Size, true)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To access the value of optionals, you often write optional.value()
. The preferred from is *optional
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
@@ -12,7 +12,8 @@ | |||
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S | FileCheck %s | |||
|
|||
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]] | |||
; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } | |||
; old: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unintentional test change? (the opt command doesn't run function-attrs)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for reminding! Our PR causes this test failed and I'm not sure why.
Reached out to the owner of this test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the linkage number as suggested by the owner and fixed this test!
Thank you all for the comments! Addressed all except only one comment about "memprof_internal_linkage.ll". |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will review more thoroughly soon, one thing jumped out at me
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/8666 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/154/builds/7675 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/8113 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/9740 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/8404 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/108/builds/6096 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/23/builds/4962 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/6373 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/35/builds/3798 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/5670 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/13068 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/12508 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/76/builds/4622 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/41/builds/3535 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/164/builds/4704 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/52/builds/3831 Here is the relevant piece of the build log for the reference
|
@haopliu : Many clang lit tests fail with this patch
|
…7373)" This reverts commit 661c593. Multiple buildbot failures, e.g. https://lab.llvm.org/buildbot/#/builders/108/builds/6096
Add the "initializes" attribute inference. This change is expected to have ~0.09% compile time regression, which seems acceptable for interprocedural DSE. https://llvm-compile-time-tracker.com/compare.php?from=9f10252c4ad7cffbbcf692fa9c953698f82ac4f5&to=56345c1cee4375eb5c28b8e7abf4803d20216b3b&stat=instructions%3Au
…vm#97373)" This reverts commit 661c593. Multiple buildbot failures, e.g. https://lab.llvm.org/buildbot/#/builders/108/builds/6096
…lvm#116825) Reverts llvm#97373 clang tests fail
reland #97373 after fixing clang tests. Confirmed with "ninja check-llvm" and "ninja check-clang"
Tested with an internal search backend loadtest. With `-ftrivial-auto-var-init`, this work has a 0.2%-0.3% total QPS improvement. Note that, the metric is total QPS instead of cpu-time, even 1% improvement means a lot. - Add the "initializes" attr: llvm#97373 - Apply the attr to DSE: llvm#107282
…lvm#116825) Reverts llvm#97373 clang tests fail
Add the "initializes" attribute inference.
This change is expected to have ~0.09% compile time regression, which seems acceptable for interprocedural DSE.
https://llvm-compile-time-tracker.com/compare.php?from=9f10252c4ad7cffbbcf692fa9c953698f82ac4f5&to=56345c1cee4375eb5c28b8e7abf4803d20216b3b&stat=instructions%3Au