diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index a37f295abbd31c..1f9c9588659ce1 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRangeList.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -164,6 +165,11 @@ static cl::opt OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses.")); +// TODO: turn on and remove this flag. +static cl::opt EnableInitializesImprovement( + "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden, + cl::desc("Enable the initializes attr improvement in DSE")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -809,8 +815,10 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { // A memory location wrapper that represents a MemoryLocation, `MemLoc`, // defined by `MemDef`. struct MemoryLocationWrapper { - MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef) - : MemLoc(MemLoc), MemDef(MemDef) { + MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef, + bool DefByInitializesAttr) + : MemLoc(MemLoc), MemDef(MemDef), + DefByInitializesAttr(DefByInitializesAttr) { assert(MemLoc.Ptr && "MemLoc should be not null"); UnderlyingObject = getUnderlyingObject(MemLoc.Ptr); DefInst = MemDef->getMemoryInst(); @@ -820,20 +828,59 @@ struct MemoryLocationWrapper { const Value *UnderlyingObject; MemoryDef *MemDef; Instruction *DefInst; + bool DefByInitializesAttr = false; }; // A memory def wrapper that represents a MemoryDef and the MemoryLocation(s) // defined by this MemoryDef. struct MemoryDefWrapper { - MemoryDefWrapper(MemoryDef *MemDef, std::optional MemLoc) { + MemoryDefWrapper(MemoryDef *MemDef, + ArrayRef> MemLocations) { DefInst = MemDef->getMemoryInst(); - if (MemLoc.has_value()) - DefinedLocation = MemoryLocationWrapper(*MemLoc, MemDef); + for (auto &[MemLoc, DefByInitializesAttr] : MemLocations) + DefinedLocations.push_back( + MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr)); } Instruction *DefInst; - std::optional DefinedLocation = std::nullopt; + SmallVector DefinedLocations; +}; + +bool hasInitializesAttr(Instruction *I) { + CallBase *CB = dyn_cast(I); + return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); +} + +struct ArgumentInitInfo { + unsigned Idx; + bool IsDeadOrInvisibleOnUnwind; + ConstantRangeList Inits; }; +// Return the intersected range list of the initializes attributes of "Args". +// "Args" are call arguments that alias to each other. +// If any argument in "Args" doesn't have dead_on_unwind attr and +// "CallHasNoUnwindAttr" is false, return empty. +ConstantRangeList getIntersectedInitRangeList(ArrayRef Args, + bool CallHasNoUnwindAttr) { + if (Args.empty()) + return {}; + + // To address unwind, the function should have nounwind attribute or the + // arguments have dead or invisible on unwind. Otherwise, return empty. + for (const auto &Arg : Args) { + if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind) + return {}; + if (Arg.Inits.empty()) + return {}; + } + + ConstantRangeList IntersectedIntervals = Args.front().Inits; + for (auto &Arg : Args.drop_front()) + IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits); + + return IntersectedIntervals; +} + struct DSEState { Function &F; AliasAnalysis &AA; @@ -911,7 +958,8 @@ struct DSEState { auto *MD = dyn_cast_or_null(MA); if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit && - (getLocForWrite(&I) || isMemTerminatorInst(&I))) + (getLocForWrite(&I) || isMemTerminatorInst(&I) || + (EnableInitializesImprovement && hasInitializesAttr(&I)))) MemDefs.push_back(MD); } } @@ -1147,13 +1195,26 @@ struct DSEState { return MemoryLocation::getOrNone(I); } - std::optional getLocForInst(Instruction *I) { + // Returns a list of pairs written by I. + // The bool means whether the write is from Initializes attr. + SmallVector, 1> + getLocForInst(Instruction *I, bool ConsiderInitializesAttr) { + SmallVector, 1> Locations; if (isMemTerminatorInst(I)) { - if (auto Loc = getLocForTerminator(I)) { - return Loc->first; + if (auto Loc = getLocForTerminator(I)) + Locations.push_back(std::make_pair(Loc->first, false)); + return Locations; + } + + if (auto Loc = getLocForWrite(I)) + Locations.push_back(std::make_pair(*Loc, false)); + + if (ConsiderInitializesAttr) { + for (auto &MemLoc : getInitializesArgMemLoc(I)) { + Locations.push_back(std::make_pair(MemLoc, true)); } } - return getLocForWrite(I); + return Locations; } /// Assuming this instruction has a dead analyzable write, can we delete @@ -1365,7 +1426,8 @@ struct DSEState { getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, const MemoryLocation &KillingLoc, const Value *KillingUndObj, unsigned &ScanLimit, unsigned &WalkerStepLimit, - bool IsMemTerm, unsigned &PartialLimit) { + bool IsMemTerm, unsigned &PartialLimit, + bool IsInitializesAttrMemLoc) { if (ScanLimit == 0 || WalkerStepLimit == 0) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); return std::nullopt; @@ -1602,7 +1664,16 @@ struct DSEState { // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. - if (isReadClobber(MaybeDeadLoc, UseInst)) { + // If KillingDef is a CallInst with "initializes" attribute, the reads in + // the callee would be dominated by initializations, so it should be safe. + bool IsKillingDefFromInitAttr = false; + if (IsInitializesAttrMemLoc) { + if (KillingI == UseInst && + KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) + IsKillingDefFromInitAttr = true; + } + + if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) { LLVM_DEBUG(dbgs() << " ... found read clobber\n"); return std::nullopt; } @@ -2170,6 +2241,16 @@ struct DSEState { return MadeChange; } + // Return the locations written by the initializes attribute. + // Note that this function considers: + // 1. Unwind edge: use "initializes" attribute only if the callee has + // "nounwind" attribute, or the argument has "dead_on_unwind" attribute, + // or the argument is invisible to caller on unwind. That is, we don't + // perform incorrect DSE on unwind edges in the current function. + // 2. Argument alias: for aliasing arguments, the "initializes" attribute is + // the intersected range list of their "initializes" attributes. + SmallVector getInitializesArgMemLoc(const Instruction *I); + // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are // killed by `KillingLocWrapper.MemDef`. Return whether // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted. @@ -2181,6 +2262,75 @@ struct DSEState { bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper); }; +SmallVector +DSEState::getInitializesArgMemLoc(const Instruction *I) { + const CallBase *CB = dyn_cast(I); + if (!CB) + return {}; + + // Collect aliasing arguments and their initializes ranges. + SmallMapVector, 2> Arguments; + for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) { + ConstantRangeList Inits; + Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes); + if (InitializesAttr.isValid()) + Inits = InitializesAttr.getValueAsConstantRangeList(); + + Value *CurArg = CB->getArgOperand(Idx); + // We don't perform incorrect DSE on unwind edges in the current function, + // and use the "initializes" attribute to kill dead stores if: + // - The call does not throw exceptions, "CB->doesNotThrow()". + // - Or the callee parameter has "dead_on_unwind" attribute. + // - Or the argument is invisible to caller on unwind, and there are no + // unwind edges from this call in the current function (e.g. `CallInst`). + bool IsDeadOrInvisibleOnUnwind = + CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) || + (isa(CB) && isInvisibleToCallerOnUnwind(CurArg)); + ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits}; + bool FoundAliasing = false; + for (auto &[Arg, AliasList] : Arguments) { + auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg), + MemoryLocation::getBeforeOrAfter(CurArg)); + if (AAR == AliasResult::NoAlias) { + continue; + } else if (AAR == AliasResult::MustAlias) { + FoundAliasing = true; + AliasList.push_back(InitInfo); + } else { + // For PartialAlias and MayAlias, there is an offset or may be an + // unknown offset between the arguments and we insert an empty init + // range to discard the entire initializes info while intersecting. + FoundAliasing = true; + AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind, + ConstantRangeList()}); + } + } + if (!FoundAliasing) + Arguments[CurArg] = {InitInfo}; + } + + SmallVector Locations; + for (const auto &[_, Args] : Arguments) { + auto IntersectedRanges = + getIntersectedInitRangeList(Args, CB->doesNotThrow()); + if (IntersectedRanges.empty()) + continue; + + for (const auto &Arg : Args) { + for (const auto &Range : IntersectedRanges) { + int64_t Start = Range.getLower().getSExtValue(); + int64_t End = Range.getUpper().getSExtValue(); + // For now, we only handle locations starting at offset 0. + if (Start == 0) + Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx), + LocationSize::precise(End - Start), + CB->getAAMetadata())); + } + } + } + return Locations; +} + std::pair DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) { bool Changed = false; @@ -2207,7 +2357,8 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) { std::optional MaybeDeadAccess = getDomMemoryDef( KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc, KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit, - isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit); + isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit, + KillingLocWrapper.DefByInitializesAttr); if (!MaybeDeadAccess) { LLVM_DEBUG(dbgs() << " finished walk\n"); @@ -2230,10 +2381,20 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) { } continue; } + // We cannot apply the initializes attribute to DeadAccess/DeadDef. + // It would incorrectly consider a call instruction as redundant store + // and remove this call instruction. + // TODO: this conflates the existence of a MemoryLocation with being able + // to delete the instruction. Fix isRemovable() to consider calls with + // side effects that cannot be removed, e.g. calls with the initializes + // attribute, and remove getLocForInst(ConsiderInitializesAttr = false). MemoryDefWrapper DeadDefWrapper( cast(DeadAccess), - getLocForInst(cast(DeadAccess)->getMemoryInst())); - MemoryLocationWrapper &DeadLocWrapper = *DeadDefWrapper.DefinedLocation; + getLocForInst(cast(DeadAccess)->getMemoryInst(), + /*ConsiderInitializesAttr=*/false)); + assert(DeadDefWrapper.DefinedLocations.size() == 1); + MemoryLocationWrapper &DeadLocWrapper = + DeadDefWrapper.DefinedLocations.front(); LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n"); ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess()); NumGetDomMemoryDefPassed++; @@ -2311,37 +2472,41 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) { } bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) { - if (!KillingDefWrapper.DefinedLocation.has_value()) { + if (KillingDefWrapper.DefinedLocations.empty()) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " << *KillingDefWrapper.DefInst << "\n"); return false; } - auto &KillingLocWrapper = *KillingDefWrapper.DefinedLocation; - LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " - << *KillingLocWrapper.MemDef << " (" - << *KillingLocWrapper.DefInst << ")\n"); - auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper); - - // Check if the store is a no-op. - if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef, - KillingLocWrapper.UnderlyingObject)) { - LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " - << *KillingLocWrapper.DefInst << '\n'); - deleteDeadInstruction(KillingLocWrapper.DefInst); - NumRedundantStores++; - return true; - } - // Can we form a calloc from a memset/malloc pair? - if (!DeletedKillingLoc && - tryFoldIntoCalloc(KillingLocWrapper.MemDef, - KillingLocWrapper.UnderlyingObject)) { - LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n" - << " DEAD: " << *KillingLocWrapper.DefInst << '\n'); - deleteDeadInstruction(KillingLocWrapper.DefInst); - return true; + bool MadeChange = false; + for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) { + LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " + << *KillingLocWrapper.MemDef << " (" + << *KillingLocWrapper.DefInst << ")\n"); + auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper); + + // Check if the store is a no-op. + if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef, + KillingLocWrapper.UnderlyingObject)) { + LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " + << *KillingLocWrapper.DefInst << '\n'); + deleteDeadInstruction(KillingLocWrapper.DefInst); + NumRedundantStores++; + MadeChange = true; + continue; + } + // Can we form a calloc from a memset/malloc pair? + if (!DeletedKillingLoc && + tryFoldIntoCalloc(KillingLocWrapper.MemDef, + KillingLocWrapper.UnderlyingObject)) { + LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n" + << " DEAD: " << *KillingLocWrapper.DefInst << '\n'); + deleteDeadInstruction(KillingLocWrapper.DefInst); + MadeChange = true; + continue; + } } - return Changed; + return MadeChange; } static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, @@ -2357,7 +2522,8 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, continue; MemoryDefWrapper KillingDefWrapper( - KillingDef, State.getLocForInst(KillingDef->getMemoryInst())); + KillingDef, State.getLocForInst(KillingDef->getMemoryInst(), + EnableInitializesImprovement)); MadeChange |= State.eliminateDeadDefs(KillingDefWrapper); } diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll new file mode 100644 index 00000000000000..d93da9b6612b05 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll @@ -0,0 +1,301 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=dse -enable-dse-initializes-attr-improvement -S | FileCheck %s + +declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind) +declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind) +declare void @p1_clobber(ptr nocapture noundef) +declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind) +declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind) +declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2))) +declare void @p2_no_dead_on_unwind_but_nounwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2))) nounwind + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p1_write_only_caller() { +; CHECK-LABEL: @p1_write_only_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: call void @p1_write_only(ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca i16 + store i16 0, ptr %ptr + call void @p1_write_only(ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p1_write_then_read_caller() { +; CHECK-LABEL: @p1_write_then_read_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: call void @p1_write_then_read(ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca i16 + store i16 0, ptr %ptr + call void @p1_write_then_read(ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p1_write_then_read_caller_with_clobber() { +; CHECK-LABEL: @p1_write_then_read_caller_with_clobber( +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store i16 0, ptr [[PTR]], align 2 +; CHECK-NEXT: call void @p1_clobber(ptr [[PTR]]) +; CHECK-NEXT: call void @p1_write_then_read(ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca i16 + store i16 0, ptr %ptr + call void @p1_clobber(ptr %ptr) + call void @p1_write_then_read(ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +declare void @p1_write_then_read_raw(ptr nocapture noundef initializes((0, 2))) +define i16 @p1_initializes_invoke() personality ptr undef { +; CHECK-LABEL: @p1_initializes_invoke( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store i16 0, ptr [[PTR]], align 2 +; CHECK-NEXT: invoke void @p1_write_then_read_raw(ptr [[PTR]]) +; CHECK-NEXT: to label [[BB1:%.*]] unwind label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: ret i16 0 +; CHECK: bb2: +; CHECK-NEXT: [[TMP:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; +entry: + %ptr = alloca i16 + store i16 0, ptr %ptr + invoke void @p1_write_then_read_raw(ptr %ptr) to label %bb1 unwind label %bb2 +bb1: + ret i16 0 +bb2: + %tmp = landingpad { ptr, i32 } + cleanup + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_same_range_noalias_caller() { +; CHECK-LABEL: @p2_same_range_noalias_caller( +; CHECK-NEXT: [[PTR1:%.*]] = alloca i16, align 2 +; CHECK-NEXT: [[PTR2:%.*]] = alloca i16, align 2 +; CHECK-NEXT: call void @p2_same_range(ptr [[PTR1]], ptr [[PTR2]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR1]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr1 = alloca i16 + %ptr2 = alloca i16 + store i16 0, ptr %ptr1 + store i16 0, ptr %ptr2 + call void @p2_same_range(ptr %ptr1, ptr %ptr2) + %l = load i16, ptr %ptr1 + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_same_range_must_alias_caller() { +; CHECK-LABEL: @p2_same_range_must_alias_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: call void @p2_same_range(ptr [[PTR]], ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca i16 + store i16 0, ptr %ptr + call void @p2_same_range(ptr %ptr, ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_same_range_may_or_partial_alias_caller1(ptr %base, i1 %x) { +; CHECK-LABEL: @p2_same_range_may_or_partial_alias_caller1( +; CHECK-NEXT: [[BASEPLUS:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[X:%.*]], ptr [[BASEPLUS]], ptr [[BASE]] +; CHECK-NEXT: store i32 0, ptr [[BASE]], align 4 +; CHECK-NEXT: call void @p2_same_range(ptr [[BASE]], ptr [[SEL]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[BASE]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %baseplus = getelementptr i8, ptr %base, i64 1 + %sel = select i1 %x, ptr %baseplus, ptr %base + store i32 0, ptr %base + call void @p2_same_range(ptr %base, ptr %sel) + %l = load i16, ptr %base + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_same_range_may_or_partial_alias_caller2(ptr %base1, ptr %base2) { +; CHECK-LABEL: @p2_same_range_may_or_partial_alias_caller2( +; CHECK-NEXT: store i32 0, ptr [[BASE1:%.*]], align 4 +; CHECK-NEXT: call void @p2_same_range(ptr [[BASE1]], ptr [[BASE2:%.*]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[BASE1]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + store i32 0, ptr %base1 + call void @p2_same_range(ptr %base1, ptr %base2) + %l = load i16, ptr %base1 + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_no_init_alias_caller() { +; CHECK-LABEL: @p2_no_init_alias_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store i16 0, ptr [[PTR]], align 2 +; CHECK-NEXT: call void @p2_no_init(ptr [[PTR]], ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca i16 + store i16 0, ptr %ptr + call void @p2_no_init(ptr %ptr, ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Althrough the 2nd parameter of `p2_no_dead_on_unwind` doesn't have +; the 'dead_on_unwind' attribute, it's invisble to caller on unwind. +; DSE still uses the 'initializes' attribute and kill the dead store. +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_no_dead_on_unwind_but_invisible_to_caller_alias_caller() { +; CHECK-LABEL: @p2_no_dead_on_unwind_but_invisible_to_caller_alias_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: call void @p2_no_dead_on_unwind(ptr [[PTR]], ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca i16 + store i16 0, ptr %ptr + call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_no_dead_on_unwind_alias_caller(ptr %ptr) { +; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller( +; CHECK-NEXT: store i16 0, ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: call void @p2_no_dead_on_unwind(ptr [[PTR]], ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + store i16 0, ptr %ptr + call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @p2_no_dead_on_unwind_but_nounwind_alias_caller() { +; CHECK-LABEL: @p2_no_dead_on_unwind_but_nounwind_alias_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca i16, align 2 +; CHECK-NEXT: call void @p2_no_dead_on_unwind_but_nounwind(ptr [[PTR]], ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca i16 + store i16 0, ptr %ptr + call void @p2_no_dead_on_unwind_but_nounwind(ptr %ptr, ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind +declare void @large_p1(ptr nocapture noundef initializes((0, 200))) nounwind +declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocapture noundef initializes((0, 100))) nounwind + +; Function Attrs: mustprogress nounwind uwtable +define i16 @large_p1_caller() { +; CHECK-LABEL: @large_p1_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca [200 x i8], align 1 +; CHECK-NEXT: call void @large_p1(ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca [200 x i8] + call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 100, i1 false) + call void @large_p1(ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @large_p2_nonalias_caller() { +; CHECK-LABEL: @large_p2_nonalias_caller( +; CHECK-NEXT: [[PTR1:%.*]] = alloca [200 x i8], align 1 +; CHECK-NEXT: [[PTR2:%.*]] = alloca [100 x i8], align 1 +; CHECK-NEXT: call void @large_p2(ptr [[PTR1]], ptr [[PTR2]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR1]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr1 = alloca [200 x i8] + %ptr2 = alloca [100 x i8] + call void @llvm.memset.p0.i64(ptr %ptr1, i8 42, i64 200, i1 false) + call void @llvm.memset.p0.i64(ptr %ptr2, i8 42, i64 100, i1 false) + call void @large_p2(ptr %ptr1, ptr %ptr2) + %l = load i16, ptr %ptr1 + ret i16 %l +} + + +; Function Attrs: mustprogress nounwind uwtable +define i16 @large_p2_must_alias_caller() { +; CHECK-LABEL: @large_p2_must_alias_caller( +; CHECK-NEXT: [[PTR:%.*]] = alloca [300 x i8], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 100 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 42, i64 200, i1 false) +; CHECK-NEXT: call void @large_p2(ptr [[PTR]], ptr [[PTR]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[PTR]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %ptr = alloca [300 x i8] + call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 300, i1 false) + call void @large_p2(ptr %ptr, ptr %ptr) + %l = load i16, ptr %ptr + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @large_p2_may_or_partial_alias_caller1(ptr %base) { +; CHECK-LABEL: @large_p2_may_or_partial_alias_caller1( +; CHECK-NEXT: [[BASEPLUS:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 100 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[BASE]], i8 42, i64 300, i1 false) +; CHECK-NEXT: call void @large_p2(ptr [[BASE]], ptr [[BASEPLUS]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[BASE]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + %baseplus = getelementptr i8, ptr %base, i64 100 + call void @llvm.memset.p0.i64(ptr %base, i8 42, i64 300, i1 false) + call void @large_p2(ptr %base, ptr %baseplus) + %l = load i16, ptr %base + ret i16 %l +} + +; Function Attrs: mustprogress nounwind uwtable +define i16 @large_p2_may_or_partial_alias_caller2(ptr %base1, ptr %base2) { +; CHECK-LABEL: @large_p2_may_or_partial_alias_caller2( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[BASE1:%.*]], i8 42, i64 300, i1 false) +; CHECK-NEXT: call void @large_p2(ptr [[BASE1]], ptr [[BASE2:%.*]]) +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[BASE1]], align 2 +; CHECK-NEXT: ret i16 [[L]] +; + call void @llvm.memset.p0.i64(ptr %base1, i8 42, i64 300, i1 false) + call void @large_p2(ptr %base1, ptr %base2) + %l = load i16, ptr %base1 + ret i16 %l +} +