-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DSE] Apply initializes attribute to DSE #107282
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Haopeng Liu (haopliu) ChangesApply the initializes attribute to DSE and guard with a flag, "enable-dse-initializes-attr-improvement". The attribute support has been landed in: #84803 Full diff: https://github.com/llvm/llvm-project/pull/107282.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a37f295abbd31c..3ccb064adbf0df 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -164,6 +165,10 @@ static cl::opt<bool>
OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
cl::desc("Allow DSE to optimize memory accesses."));
+static cl::opt<bool> EnableInitializesImprovement(
+ "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
+ cl::desc("Enable the initializes attr improvement in DSE"));
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
@@ -809,8 +814,10 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
// defined by `MemDef`.
struct MemoryLocationWrapper {
- MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef)
- : MemLoc(MemLoc), MemDef(MemDef) {
+ MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
+ bool DefByInitializesAttr)
+ : MemLoc(MemLoc), MemDef(MemDef),
+ DefByInitializesAttr(DefByInitializesAttr) {
assert(MemLoc.Ptr && "MemLoc should be not null");
UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
DefInst = MemDef->getMemoryInst();
@@ -820,20 +827,121 @@ struct MemoryLocationWrapper {
const Value *UnderlyingObject;
MemoryDef *MemDef;
Instruction *DefInst;
+ bool DefByInitializesAttr = false;
};
// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
// defined by this MemoryDef.
struct MemoryDefWrapper {
- MemoryDefWrapper(MemoryDef *MemDef, std::optional<MemoryLocation> MemLoc) {
+ MemoryDefWrapper(
+ MemoryDef *MemDef,
+ const SmallVectorImpl<std::pair<MemoryLocation, bool>> &MemLocations) {
DefInst = MemDef->getMemoryInst();
- if (MemLoc.has_value())
- DefinedLocation = MemoryLocationWrapper(*MemLoc, MemDef);
+ for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
+ DefinedLocations.push_back(
+ MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
}
Instruction *DefInst;
- std::optional<MemoryLocationWrapper> DefinedLocation = std::nullopt;
+ SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
+};
+
+bool HasInitializesAttr(Instruction *I) {
+ CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return false;
+
+ for (size_t Idx = 0; Idx < CB->arg_size(); Idx++)
+ if (CB->paramHasAttr(Idx, Attribute::Initializes))
+ return true;
+ return false;
+}
+
+struct ArgumentInitInfo {
+ size_t Idx = -1;
+ ConstantRangeList Inits;
+ bool HasDeadOnUnwindAttr = false;
+ bool FuncHasNoUnwindAttr = false;
};
+ConstantRangeList
+GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
+ if (Args.empty())
+ return {};
+
+ // To address unwind, the function should have nounwind attribute or the
+ // arguments have dead_on_unwind attribute. Otherwise, return empty.
+ for (const auto &Arg : Args) {
+ if (!Arg.FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+ return {};
+ if (Arg.Inits.empty())
+ return {};
+ }
+
+ if (Args.size() == 1)
+ return Args[0].Inits;
+
+ ConstantRangeList MergedIntervals = Args[0].Inits;
+ for (size_t i = 1; i < Args.size(); i++)
+ MergedIntervals = MergedIntervals.intersectWith(Args[i].Inits);
+
+ return MergedIntervals;
+}
+
+// Return the locations wrote by the initializes attribute.
+// Note that this function considers:
+// 1. Unwind edge: apply "initializes" attribute only if the callee has
+// "nounwind" attribute or the argument has "dead_on_unwind" attribute.
+// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
+// the merged range list of their "initializes" attributes.
+SmallVector<MemoryLocation, 1>
+GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
+ const CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return {};
+
+ // Collect aliasing arguments and their initializes ranges.
+ bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind);
+ SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
+ for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) {
+ ConstantRangeList Inits;
+ if (CB->paramHasAttr(Idx, Attribute::Initializes))
+ Inits = CB->getParamAttr(Idx, Attribute::Initializes)
+ .getValueAsConstantRangeList();
+
+ bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
+ ArgumentInitInfo InitInfo{Idx, Inits, HasDeadOnUnwindAttr, HasNoUnwindAttr};
+ Value *CurArg = CB->getArgOperand(Idx);
+ bool FoundAliasing = false;
+ for (auto &[Arg, AliasList] : Arguments) {
+ if (BatchAA.isMustAlias(Arg, CurArg)) {
+ FoundAliasing = true;
+ AliasList.push_back(InitInfo);
+ }
+ }
+ if (!FoundAliasing)
+ Arguments[CurArg] = {InitInfo};
+ }
+
+ SmallVector<MemoryLocation, 1> Locations;
+ for (const auto &[_, Args] : Arguments) {
+ auto MergedInitAttr = GetMergedInitAttr(Args);
+ if (MergedInitAttr.empty())
+ continue;
+
+ for (const auto &Arg : Args) {
+ for (const auto &Range : MergedInitAttr) {
+ int64_t Start = Range.getLower().getSExtValue();
+ int64_t End = Range.getUpper().getSExtValue();
+ if (Start == 0)
+ Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
+ LocationSize::precise(End - Start),
+ CB->getAAMetadata()));
+ }
+ }
+ }
+ return Locations;
+}
+
struct DSEState {
Function &F;
AliasAnalysis &AA;
@@ -911,7 +1019,8 @@ struct DSEState {
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (getLocForWrite(&I) || isMemTerminatorInst(&I)))
+ (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
+ HasInitializesAttr(&I)))
MemDefs.push_back(MD);
}
}
@@ -1147,13 +1256,26 @@ struct DSEState {
return MemoryLocation::getOrNone(I);
}
- std::optional<MemoryLocation> getLocForInst(Instruction *I) {
+ // Returns a list of <MemoryLocation, bool> pairs wrote by I.
+ // The bool means whether the write is from Initializes attr.
+ SmallVector<std::pair<MemoryLocation, bool>, 1>
+ getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
+ SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
if (isMemTerminatorInst(I)) {
- if (auto Loc = getLocForTerminator(I)) {
- return Loc->first;
+ if (auto Loc = getLocForTerminator(I))
+ Locations.push_back(std::make_pair(Loc->first, false));
+ return Locations;
+ }
+
+ if (auto Loc = getLocForWrite(I))
+ Locations.push_back(std::make_pair(*Loc, false));
+
+ if (ConsiderInitializesAttr) {
+ for (auto &MemLoc : GetInitializesArgMemLoc(I, BatchAA)) {
+ Locations.push_back(std::make_pair(MemLoc, true));
}
}
- return getLocForWrite(I);
+ return Locations;
}
/// Assuming this instruction has a dead analyzable write, can we delete
@@ -1365,7 +1487,8 @@ struct DSEState {
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
const MemoryLocation &KillingLoc, const Value *KillingUndObj,
unsigned &ScanLimit, unsigned &WalkerStepLimit,
- bool IsMemTerm, unsigned &PartialLimit) {
+ bool IsMemTerm, unsigned &PartialLimit,
+ bool IsInitializesAttrMemLoc) {
if (ScanLimit == 0 || WalkerStepLimit == 0) {
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
return std::nullopt;
@@ -1602,7 +1725,17 @@ struct DSEState {
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(MaybeDeadLoc, UseInst)) {
+ // If KillingDef is a CallInst with "initializes" attribute, the reads in
+ // Callee would be dominated by initializations, so this should be safe.
+ bool IsKillingDefFromInitAttr = false;
+ if (IsInitializesAttrMemLoc) {
+ if (KillingI == UseInst &&
+ KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) {
+ IsKillingDefFromInitAttr = true;
+ }
+ }
+
+ if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return std::nullopt;
}
@@ -2207,7 +2340,8 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
- isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit);
+ isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
+ KillingLocWrapper.DefByInitializesAttr);
if (!MaybeDeadAccess) {
LLVM_DEBUG(dbgs() << " finished walk\n");
@@ -2232,8 +2366,11 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
}
MemoryDefWrapper DeadDefWrapper(
cast<MemoryDef>(DeadAccess),
- getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst()));
- MemoryLocationWrapper &DeadLocWrapper = *DeadDefWrapper.DefinedLocation;
+ getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
+ /*ConsiderInitializesAttr=*/false));
+ assert(DeadDefWrapper.DefinedLocations.size() == 1);
+ MemoryLocationWrapper &DeadLocWrapper =
+ DeadDefWrapper.DefinedLocations.front();
LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
NumGetDomMemoryDefPassed++;
@@ -2311,37 +2448,41 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
}
bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
- if (!KillingDefWrapper.DefinedLocation.has_value()) {
+ if (KillingDefWrapper.DefinedLocations.empty()) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
<< *KillingDefWrapper.DefInst << "\n");
return false;
}
- auto &KillingLocWrapper = *KillingDefWrapper.DefinedLocation;
- LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
- << *KillingLocWrapper.MemDef << " ("
- << *KillingLocWrapper.DefInst << ")\n");
- auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
-
- // Check if the store is a no-op.
- if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
- KillingLocWrapper.UnderlyingObject)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
- << *KillingLocWrapper.DefInst << '\n');
- deleteDeadInstruction(KillingLocWrapper.DefInst);
- NumRedundantStores++;
- return true;
- }
- // Can we form a calloc from a memset/malloc pair?
- if (!DeletedKillingLoc &&
- tryFoldIntoCalloc(KillingLocWrapper.MemDef,
- KillingLocWrapper.UnderlyingObject)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
- << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
- deleteDeadInstruction(KillingLocWrapper.DefInst);
- return true;
+ bool MadeChange = false;
+ for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
+ LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
+ << *KillingLocWrapper.MemDef << " ("
+ << *KillingLocWrapper.DefInst << ")\n");
+ auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
+
+ // Check if the store is a no-op.
+ if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
+ KillingLocWrapper.UnderlyingObject)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
+ << *KillingLocWrapper.DefInst << '\n');
+ deleteDeadInstruction(KillingLocWrapper.DefInst);
+ NumRedundantStores++;
+ MadeChange = true;
+ continue;
+ }
+ // Can we form a calloc from a memset/malloc pair?
+ if (!DeletedKillingLoc &&
+ tryFoldIntoCalloc(KillingLocWrapper.MemDef,
+ KillingLocWrapper.UnderlyingObject)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
+ << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
+ deleteDeadInstruction(KillingLocWrapper.DefInst);
+ MadeChange = true;
+ continue;
+ }
}
- return Changed;
+ return MadeChange;
}
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -2357,7 +2498,8 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
continue;
MemoryDefWrapper KillingDefWrapper(
- KillingDef, State.getLocForInst(KillingDef->getMemoryInst()));
+ KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
+ EnableInitializesImprovement));
MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
}
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
new file mode 100644
index 00000000000000..c4ff69af9051bc
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
+
+declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind)
+declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind)
+declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)))
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_only_caller() {
+; CHECK-LABEL: @p1_write_only_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @p1_write_only(ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p1_write_only(ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_then_read_caller() {
+; CHECK-LABEL: @p1_write_then_read_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @p1_write_then_read(ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p1_write_then_read(ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_nonalias_caller() {
+; CHECK-LABEL: @p2_same_range_nonalias_caller(
+; CHECK-NEXT: %ptr1 = alloca i16, align 2
+; CHECK-NEXT: %ptr2 = alloca i16, align 2
+; CHECK-NEXT: call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT: %l = load i16, ptr %ptr1
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr1 = alloca i16
+ %ptr2 = alloca i16
+ store i16 0, ptr %ptr1
+ store i16 0, ptr %ptr2
+ call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+ %l = load i16, ptr %ptr1
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_alias_caller() {
+; CHECK-LABEL: @p2_same_range_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @p2_same_range(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p2_same_range(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_init_alias_caller() {
+; CHECK-LABEL: @p2_no_init_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: store i16 0, ptr %ptr
+; CHECK-NEXT: call void @p2_no_init(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p2_no_init(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_dead_on_unwind_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: store i16 0, ptr %ptr
+; CHECK-NEXT: call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
+declare void @large_p1(ptr nocapture noundef initializes((0, 200))) nounwind
+declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocapture noundef initializes((0, 100))) nounwind
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p1_caller() {
+; CHECK-LABEL: @large_p1_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @large_p1(ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 100, i1 false)
+ call void @large_p1(ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_nonalias_caller() {
+; CHECK-LABEL: @large_p2_nonalias_caller(
+; CHECK-NEXT: %ptr1 = alloca i16, align 2
+; CHECK-NEXT: %ptr2 = alloca i16, align 2
+; CHECK-NEXT: call void @large_p2(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT: %l = load i16, ptr %ptr1
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr1 = alloca i16
+ %ptr2 = alloca i16
+ call void @llvm.memset.p0.i64(ptr %ptr1, i8 42, i64 200, i1 false)
+ call void @llvm.memset.p0.i64(ptr %ptr2, i8 42, i64 100, i1 false)
+ call void @large_p2(ptr %ptr1, ptr %ptr2)
+ %l = load i16, ptr %ptr1
+ ret i16 %l
+}
+
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_alias_caller() {
+; CHECK-LABEL: @large_p2_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: %1 = getelementptr inbounds i8, ptr %ptr, i64 100
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 %1, i8 42, i64 200, i1 false)
+; CHECK-NEXT: call void @large_p2(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 300, i1 false)
+ call void @large_p2(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
|
@@ -0,0 +1,159 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | |||
; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is aa-pipeline
necessary?
also this shouldn't be running function-attrs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, you are right. Removed the aa-pipeline
and function-attrs
.
/*ConsiderInitializesAttr=*/false)); | ||
assert(DeadDefWrapper.DefinedLocations.size() == 1); | ||
MemoryLocationWrapper &DeadLocWrapper = | ||
DeadDefWrapper.DefinedLocations.front(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not 100% sure what's going on here, but it seems weird that we have two modes for MemoryDefWrapper
, a single MemoryLocation version here and a multiple MemoryLocation below. is there any way to make this a little less hacked together? why does this have to be a single MemoryLocation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We cannot apply the initializes
attribute to DeadAccess/DeadDef since it would consider a call instruction as dead store and remove it incorrectly. Added a comment to explain it. Any suggestions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after staring at this a bit, I believe this preexisting code is conflating two things: it's assuming that if there is a memory location that DeadDefWrapper
writes to that overlaps with KillingLocWrapper
, it must have no other side effects and be deletable. this happens to be true for stores and libcalls like strcpy
that are handled here, but is not necessarily true in general.
I think ideally we change isRemovable
to be more accurate about arbitrary function calls, and check that here, but I'm ok with a TODO saying something like `TODO: this conflates the existence of a MemoryLocation with being able to delete the instruction. fix isRemovable() to consider calls with side effects that cannot be removed, e.g. calls with the initializes attribute, and remove getLocForInst(ConsiderInitializesAttr = false) workaround
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this still needs a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ooh, missed this comment. Added a TODO about isRemovable(). Thanks!
return Args[0].Inits; | ||
|
||
ConstantRangeList MergedIntervals = Args[0].Inits; | ||
for (size_t i = 1; i < Args.size(); i++) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
stylish nit:
https://llvm.org/docs/CodingStandards.html#don-t-evaluate-end-every-time-through-a-loop
and capital variable names:
for (size_t I = 0, Count = Args.size(); I < Count; ++I)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for this info! Will keep these style rules in mind.
if (!CB) | ||
return false; | ||
|
||
for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same here and ++Idx
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. Thanks!
// Collect aliasing arguments and their initializes ranges. | ||
bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind); | ||
SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments; | ||
for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
again
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
return false; | ||
|
||
for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) | ||
if (CB->paramHasAttr(Idx, Attribute::Initializes)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is an AttributeList::.hasAttrSomewhere
API to do this check efficiently.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct me if I'm wrong. Different from CallBase::paramHasAttr
, AttributeList::.hasAttrSomewhere
API does not look into the called function's parameter list so we cannot apply this API here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you need to call it for both the call and function attribute list. Or add a new CallBase API that does this for you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CallBase::getArgOperandWithAttribute
is exactly what we need (call AttributeList::.hasAttrSomewhere
with both the instruction attr and the called function attr). Used this API :-D
Value *CurArg = CB->getArgOperand(Idx); | ||
bool FoundAliasing = false; | ||
for (auto &[Arg, AliasList] : Arguments) { | ||
if (BatchAA.isMustAlias(Arg, CurArg)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This handle MustAlias arguments, but what about arguments that MayAlias or PartialAlias?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Conservatively handle May-/Partial-Alias same as MustAlias.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just handling them the same as MustAlias isn't correct. If it's PartialAlias then there is an offset between the arguments, so initializes refers to different offsets. And if it's MayAlias, then there may be an unknown offset, or the arguments may be unrelated entirely.
For the PartialAlias/MayAlias cases we should discard the initializes information entirely.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, thanks for the reminder. Make sense! Updated to insert an empty range for May-/Partial-Alias. This empty range would discard the entire initializes info later while intersecting the ranges among all aliasing args.
} | ||
|
||
if (auto Loc = getLocForWrite(I)) | ||
Locations.push_back(std::make_pair(*Loc, false)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we return here or does this need to fall through?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Changed it to early return.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reverted the latest change. We need to fall through. For a call instruction, getLocForWrite may return a memory-location with imprecise size. Then, fall through to check the initializes attr.
bool IsKillingDefFromInitAttr = false; | ||
if (IsInitializesAttrMemLoc) { | ||
if (KillingI == UseInst && | ||
KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like this inner if is untested (test pass if I replace the condition with true).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch! Added a new unit test, p1_write_then_read_caller_with_clobber
, to test this inner condition.
What does the compile-time impact for this look like? |
I think this PR (enable the flag) has a negligible compile-time impact. The diff between this PR (enable the flag) and adding the attribute inference is 0. We guard the change with a flag to confirm everything is expected after landing all, then enable this flag. |
…nitializesAttr is true or false
for (auto &[Arg, AliasList] : Arguments) { | ||
if (BatchAA.isNoAlias(Arg, CurArg)) { | ||
continue; | ||
} else if (BatchAA.isMustAlias(Arg, CurArg)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should call alias() only once and then check the result.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Correct me if I'm wrong). Here checks two Value*
aliasing: we need to convert them to two MemoryLocation
, then call alias(). However, it seems that isMustAlias()
and isNoAlias()
convert in different ways: MemoryLocation(V, LocationSize::precise(1))
VS. MemoryLocation::getBeforeOrAfter(V)
. Then we have to call alias() twice?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For MustAlias the size does not matter. Using getBeforeOrAfter() is always a safe option.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. Updated to call BatchAA.alias()
using getBeforeOrAfter()
once, then check AAR. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! LG to me, but defer to other reviewers for approval.
I think aeubanks had one unresolved comment about a TODO comment?
return {}; | ||
|
||
// To address unwind, the function should have nounwind attribute or the | ||
// arguments have dead_on_unwind attribute. Otherwise, return empty. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: can you update the comment "the arguments have dead_on_unwind attribute"? I think now it is "dead or invisible on unwind"? Similar on line 862, and maybe the field "IsDeadOnUnwind" could be "IsDeadOrInvisibleOnUnwind"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with remaining comments addressed
Thank you all!
Thanks for the reminder! Done. |
@nikic any more comments? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't re-review in detail, but looks ok from a quick look.
@@ -1147,13 +1196,26 @@ struct DSEState { | |||
return MemoryLocation::getOrNone(I); | |||
} | |||
|
|||
std::optional<MemoryLocation> getLocForInst(Instruction *I) { | |||
// Returns a list of <MemoryLocation, bool> pairs wrote by I. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Returns a list of <MemoryLocation, bool> pairs wrote by I. | |
// Returns a list of <MemoryLocation, bool> pairs written by I. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
// which means no unwind edges from this call in the current function. | ||
bool IsDeadOrInvisibleOnUnwind = | ||
CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) || | ||
(isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB)); | |
(isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg)); |
Cheaper check first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
return CB != nullptr && | ||
CB->getArgOperandWithAttribute(Attribute::Initializes) != nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe more readable?
return CB != nullptr && | |
CB->getArgOperandWithAttribute(Attribute::Initializes) != nullptr; | |
return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
@@ -1602,7 +1665,16 @@ struct DSEState { | |||
|
|||
// Uses which may read the original MemoryDef mean we cannot eliminate the | |||
// original MD. Stop walk. | |||
if (isReadClobber(MaybeDeadLoc, UseInst)) { | |||
// If KillingDef is a CallInst with "initializes" attribute, the reads in | |||
// Callee would be dominated by initializations, so this should be safe. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Callee would be dominated by initializations, so this should be safe. | |
// the callee would be dominated by initializations, so this should be safe. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
// Note that this function considers: | ||
// 1. Unwind edge: use "initializes" attribute only if the callee has | ||
// "nounwind" attribute, or the argument has "dead_on_unwind" attribute, | ||
// or the argument is invisble to caller on unwind. That is, we don't |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// or the argument is invisble to caller on unwind. That is, we don't | |
// or the argument is invisible to caller on unwind. That is, we don't |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
Value *CurArg = CB->getArgOperand(Idx); | ||
// We don't perform incorrect DSE on unwind edges in the current function, | ||
// and use the "initializes" attribute to kill dead stores if: | ||
// - The call does not throw exceptions, "CB->doesNotThrow()". |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is nounwind checked in getIntersectedInitRangeList, no need to check it in within IsDeadOrInvisibleOnUnwind as the others?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IsDeadOrInvisibleOnUnwind
is per argument nounwind.
getIntersectedInitRangeList
considers IsDeadOrInvisibleOnUnwind
per argument and CB->doesNotThrow() together.
// - Or the argument is invisible to caller on unwind, and CB isa<CallInst> | ||
// which means no unwind edges from this call in the current function. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe nit: should be implicit that it's not an Invoke if there are no unwind edges?
// - Or the argument is invisible to caller on unwind, and CB isa<CallInst> | |
// which means no unwind edges from this call in the current function. | |
// - Or the argument is invisible to the caller on unwind, and it's a pure function call, i.e., there are no | |
// unwind edges from this call in the current function. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't heard the term "pure function call" refer to a call that doesn't unwind, I prefer something closer to the current version.
Or the argument is invisible to caller on unwind, and there are no unwind edges from this call in the current function (e.g. `CallInst`).
There are probably (?) more cases than just CallInst
where we do this optimization so I use e.g.
instead of i.e.
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you both, done!
Thank you all! I'm going to merge this PR. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/7648 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/187/builds/2087 Here is the relevant piece of the build log for the reference
|
@haopliu : As the build bots say, many lit tests fail when I've compiled with EXPENSIVE_CHECKS on. |
This reverts commit 089237c.
The revert went to a branch instead of |
Reverts #107282 Seems to be causing invalid analysis caching as mentioned in #107282 (comment).
reverted in #113589 |
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " | ||
<< *KillingLocWrapper.MemDef << " (" | ||
<< *KillingLocWrapper.DefInst << ")\n"); | ||
auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
looks like we're not taking this Changed
into account
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, nice catch! Is there a way to launch an offline buildbot run to validate? https://lab.llvm.org/buildbot/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you repro locally with the same cmake flags as the bot?
E.g., -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON
in this step https://lab.llvm.org/buildbot/#/builders/16/builds/7648/steps/4/logs/stdio ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup, I repro the failure locally and confirmed that MadeChange |= Changed;
works.
Will retry this PR!
retry #107282 Fixed with `MadeChange |= Changed;` and confirmed it works. ``` cmake -DLLVM_CCACHE_BUILD=ON -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON -DLLVM_ENABLE_WERROR=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=-U_GLIBCXX_DEBUG '-DLLVM_LIT_ARGS=-v -vv -j96' '-DLLVM_ENABLE_PROJECTS=llvm;lld' -DLLVM_ENABLE_ASSERTIONS=ON -GNinja ../llvm ninja check-llvm ```
Re-merged this PR in #113630 with a fix. |
Apply the initializes attribute to DSE and guard with a flag, "enable-dse-initializes-attr-improvement". The attribute support has been landed in: llvm#84803 The attribute inference will be landed after this PR: llvm#97373
Reverts llvm#107282 Seems to be causing invalid analysis caching as mentioned in llvm#107282 (comment).
retry llvm#107282 Fixed with `MadeChange |= Changed;` and confirmed it works. ``` cmake -DLLVM_CCACHE_BUILD=ON -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON -DLLVM_ENABLE_WERROR=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=-U_GLIBCXX_DEBUG '-DLLVM_LIT_ARGS=-v -vv -j96' '-DLLVM_ENABLE_PROJECTS=llvm;lld' -DLLVM_ENABLE_ASSERTIONS=ON -GNinja ../llvm ninja check-llvm ```
Tested with an internal search backend loadtest. With `-ftrivial-auto-var-init`, this work has a 0.2%-0.3% total QPS improvement. Note that, the metric is total QPS instead of cpu-time, even 1% improvement means a lot. - Add the "initializes" attr: llvm#97373 - Apply the attr to DSE: llvm#107282
Apply the initializes attribute to DSE and guard with a flag, "enable-dse-initializes-attr-improvement".
The attribute support has been landed in: #84803
The attribute inference will be landed after this PR: #97373