Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DSE] Apply initializes attribute to DSE #107282

Merged
merged 15 commits into from
Oct 24, 2024
Merged

Conversation

haopliu
Copy link
Contributor

@haopliu haopliu commented Sep 4, 2024

Apply the initializes attribute to DSE and guard with a flag, "enable-dse-initializes-attr-improvement".

The attribute support has been landed in: #84803
The attribute inference will be landed after this PR: #97373

@llvmbot
Copy link
Member

llvmbot commented Sep 4, 2024

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Haopeng Liu (haopliu)

Changes

Apply the initializes attribute to DSE and guard with a flag, "enable-dse-initializes-attr-improvement".

The attribute support has been landed in: #84803
The attribute inference will be landed after this PR: #97373


Full diff: https://github.com/llvm/llvm-project/pull/107282.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+184-42)
  • (added) llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll (+159)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a37f295abbd31c..3ccb064adbf0df 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,6 +52,7 @@
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRangeList.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@@ -164,6 +165,10 @@ static cl::opt<bool>
     OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
                       cl::desc("Allow DSE to optimize memory accesses."));
 
+static cl::opt<bool> EnableInitializesImprovement(
+    "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
+    cl::desc("Enable the initializes attr improvement in DSE"));
+
 //===----------------------------------------------------------------------===//
 // Helper functions
 //===----------------------------------------------------------------------===//
@@ -809,8 +814,10 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
 // A memory location wrapper that represents a MemoryLocation, `MemLoc`,
 // defined by `MemDef`.
 struct MemoryLocationWrapper {
-  MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef)
-      : MemLoc(MemLoc), MemDef(MemDef) {
+  MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
+                        bool DefByInitializesAttr)
+      : MemLoc(MemLoc), MemDef(MemDef),
+        DefByInitializesAttr(DefByInitializesAttr) {
     assert(MemLoc.Ptr && "MemLoc should be not null");
     UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
     DefInst = MemDef->getMemoryInst();
@@ -820,20 +827,121 @@ struct MemoryLocationWrapper {
   const Value *UnderlyingObject;
   MemoryDef *MemDef;
   Instruction *DefInst;
+  bool DefByInitializesAttr = false;
 };
 
 // A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
 // defined by this MemoryDef.
 struct MemoryDefWrapper {
-  MemoryDefWrapper(MemoryDef *MemDef, std::optional<MemoryLocation> MemLoc) {
+  MemoryDefWrapper(
+      MemoryDef *MemDef,
+      const SmallVectorImpl<std::pair<MemoryLocation, bool>> &MemLocations) {
     DefInst = MemDef->getMemoryInst();
-    if (MemLoc.has_value())
-      DefinedLocation = MemoryLocationWrapper(*MemLoc, MemDef);
+    for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
+      DefinedLocations.push_back(
+          MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
   }
   Instruction *DefInst;
-  std::optional<MemoryLocationWrapper> DefinedLocation = std::nullopt;
+  SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
+};
+
+bool HasInitializesAttr(Instruction *I) {
+  CallBase *CB = dyn_cast<CallBase>(I);
+  if (!CB)
+    return false;
+
+  for (size_t Idx = 0; Idx < CB->arg_size(); Idx++)
+    if (CB->paramHasAttr(Idx, Attribute::Initializes))
+      return true;
+  return false;
+}
+
+struct ArgumentInitInfo {
+  size_t Idx = -1;
+  ConstantRangeList Inits;
+  bool HasDeadOnUnwindAttr = false;
+  bool FuncHasNoUnwindAttr = false;
 };
 
+ConstantRangeList
+GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
+  if (Args.empty())
+    return {};
+
+  // To address unwind, the function should have nounwind attribute or the
+  // arguments have dead_on_unwind attribute. Otherwise, return empty.
+  for (const auto &Arg : Args) {
+    if (!Arg.FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+      return {};
+    if (Arg.Inits.empty())
+      return {};
+  }
+
+  if (Args.size() == 1)
+    return Args[0].Inits;
+
+  ConstantRangeList MergedIntervals = Args[0].Inits;
+  for (size_t i = 1; i < Args.size(); i++)
+    MergedIntervals = MergedIntervals.intersectWith(Args[i].Inits);
+
+  return MergedIntervals;
+}
+
+// Return the locations wrote by the initializes attribute.
+// Note that this function considers:
+// 1. Unwind edge: apply "initializes" attribute only if the callee has
+//    "nounwind" attribute or the argument has "dead_on_unwind" attribute.
+// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
+//    the merged range list of their "initializes" attributes.
+SmallVector<MemoryLocation, 1>
+GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
+  const CallBase *CB = dyn_cast<CallBase>(I);
+  if (!CB)
+    return {};
+
+  // Collect aliasing arguments and their initializes ranges.
+  bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind);
+  SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
+  for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) {
+    ConstantRangeList Inits;
+    if (CB->paramHasAttr(Idx, Attribute::Initializes))
+      Inits = CB->getParamAttr(Idx, Attribute::Initializes)
+                  .getValueAsConstantRangeList();
+
+    bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
+    ArgumentInitInfo InitInfo{Idx, Inits, HasDeadOnUnwindAttr, HasNoUnwindAttr};
+    Value *CurArg = CB->getArgOperand(Idx);
+    bool FoundAliasing = false;
+    for (auto &[Arg, AliasList] : Arguments) {
+      if (BatchAA.isMustAlias(Arg, CurArg)) {
+        FoundAliasing = true;
+        AliasList.push_back(InitInfo);
+      }
+    }
+    if (!FoundAliasing)
+      Arguments[CurArg] = {InitInfo};
+  }
+
+  SmallVector<MemoryLocation, 1> Locations;
+  for (const auto &[_, Args] : Arguments) {
+    auto MergedInitAttr = GetMergedInitAttr(Args);
+    if (MergedInitAttr.empty())
+      continue;
+
+    for (const auto &Arg : Args) {
+      for (const auto &Range : MergedInitAttr) {
+        int64_t Start = Range.getLower().getSExtValue();
+        int64_t End = Range.getUpper().getSExtValue();
+        if (Start == 0)
+          Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
+                                             LocationSize::precise(End - Start),
+                                             CB->getAAMetadata()));
+      }
+    }
+  }
+  return Locations;
+}
+
 struct DSEState {
   Function &F;
   AliasAnalysis &AA;
@@ -911,7 +1019,8 @@ struct DSEState {
 
         auto *MD = dyn_cast_or_null<MemoryDef>(MA);
         if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
-            (getLocForWrite(&I) || isMemTerminatorInst(&I)))
+            (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
+             HasInitializesAttr(&I)))
           MemDefs.push_back(MD);
       }
     }
@@ -1147,13 +1256,26 @@ struct DSEState {
     return MemoryLocation::getOrNone(I);
   }
 
-  std::optional<MemoryLocation> getLocForInst(Instruction *I) {
+  // Returns a list of <MemoryLocation, bool> pairs wrote by I.
+  // The bool means whether the write is from Initializes attr.
+  SmallVector<std::pair<MemoryLocation, bool>, 1>
+  getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
+    SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
     if (isMemTerminatorInst(I)) {
-      if (auto Loc = getLocForTerminator(I)) {
-        return Loc->first;
+      if (auto Loc = getLocForTerminator(I))
+        Locations.push_back(std::make_pair(Loc->first, false));
+      return Locations;
+    }
+
+    if (auto Loc = getLocForWrite(I))
+      Locations.push_back(std::make_pair(*Loc, false));
+
+    if (ConsiderInitializesAttr) {
+      for (auto &MemLoc : GetInitializesArgMemLoc(I, BatchAA)) {
+        Locations.push_back(std::make_pair(MemLoc, true));
       }
     }
-    return getLocForWrite(I);
+    return Locations;
   }
 
   /// Assuming this instruction has a dead analyzable write, can we delete
@@ -1365,7 +1487,8 @@ struct DSEState {
   getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
                   const MemoryLocation &KillingLoc, const Value *KillingUndObj,
                   unsigned &ScanLimit, unsigned &WalkerStepLimit,
-                  bool IsMemTerm, unsigned &PartialLimit) {
+                  bool IsMemTerm, unsigned &PartialLimit,
+                  bool IsInitializesAttrMemLoc) {
     if (ScanLimit == 0 || WalkerStepLimit == 0) {
       LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");
       return std::nullopt;
@@ -1602,7 +1725,17 @@ struct DSEState {
 
       // Uses which may read the original MemoryDef mean we cannot eliminate the
       // original MD. Stop walk.
-      if (isReadClobber(MaybeDeadLoc, UseInst)) {
+      // If KillingDef is a CallInst with "initializes" attribute, the reads in
+      // Callee would be dominated by initializations, so this should be safe.
+      bool IsKillingDefFromInitAttr = false;
+      if (IsInitializesAttrMemLoc) {
+        if (KillingI == UseInst &&
+            KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) {
+          IsKillingDefFromInitAttr = true;
+        }
+      }
+
+      if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
         LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
         return std::nullopt;
       }
@@ -2207,7 +2340,8 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
     std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
         KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
         KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
-        isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit);
+        isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
+        KillingLocWrapper.DefByInitializesAttr);
 
     if (!MaybeDeadAccess) {
       LLVM_DEBUG(dbgs() << "  finished walk\n");
@@ -2232,8 +2366,11 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
     }
     MemoryDefWrapper DeadDefWrapper(
         cast<MemoryDef>(DeadAccess),
-        getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst()));
-    MemoryLocationWrapper &DeadLocWrapper = *DeadDefWrapper.DefinedLocation;
+        getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
+                      /*ConsiderInitializesAttr=*/false));
+    assert(DeadDefWrapper.DefinedLocations.size() == 1);
+    MemoryLocationWrapper &DeadLocWrapper =
+        DeadDefWrapper.DefinedLocations.front();
     LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
     ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
     NumGetDomMemoryDefPassed++;
@@ -2311,37 +2448,41 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
 }
 
 bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
-  if (!KillingDefWrapper.DefinedLocation.has_value()) {
+  if (KillingDefWrapper.DefinedLocations.empty()) {
     LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
                       << *KillingDefWrapper.DefInst << "\n");
     return false;
   }
 
-  auto &KillingLocWrapper = *KillingDefWrapper.DefinedLocation;
-  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
-                    << *KillingLocWrapper.MemDef << " ("
-                    << *KillingLocWrapper.DefInst << ")\n");
-  auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
-
-  // Check if the store is a no-op.
-  if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
-                                        KillingLocWrapper.UnderlyingObject)) {
-    LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: "
-                      << *KillingLocWrapper.DefInst << '\n');
-    deleteDeadInstruction(KillingLocWrapper.DefInst);
-    NumRedundantStores++;
-    return true;
-  }
-  // Can we form a calloc from a memset/malloc pair?
-  if (!DeletedKillingLoc &&
-      tryFoldIntoCalloc(KillingLocWrapper.MemDef,
-                        KillingLocWrapper.UnderlyingObject)) {
-    LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
-                      << "  DEAD: " << *KillingLocWrapper.DefInst << '\n');
-    deleteDeadInstruction(KillingLocWrapper.DefInst);
-    return true;
+  bool MadeChange = false;
+  for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
+    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
+                      << *KillingLocWrapper.MemDef << " ("
+                      << *KillingLocWrapper.DefInst << ")\n");
+    auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
+
+    // Check if the store is a no-op.
+    if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
+                                          KillingLocWrapper.UnderlyingObject)) {
+      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: "
+                        << *KillingLocWrapper.DefInst << '\n');
+      deleteDeadInstruction(KillingLocWrapper.DefInst);
+      NumRedundantStores++;
+      MadeChange = true;
+      continue;
+    }
+    // Can we form a calloc from a memset/malloc pair?
+    if (!DeletedKillingLoc &&
+        tryFoldIntoCalloc(KillingLocWrapper.MemDef,
+                          KillingLocWrapper.UnderlyingObject)) {
+      LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
+                        << "  DEAD: " << *KillingLocWrapper.DefInst << '\n');
+      deleteDeadInstruction(KillingLocWrapper.DefInst);
+      MadeChange = true;
+      continue;
+    }
   }
-  return Changed;
+  return MadeChange;
 }
 
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -2357,7 +2498,8 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
       continue;
 
     MemoryDefWrapper KillingDefWrapper(
-        KillingDef, State.getLocForInst(KillingDef->getMemoryInst()));
+        KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
+                                        EnableInitializesImprovement));
     MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
   }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
new file mode 100644
index 00000000000000..c4ff69af9051bc
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
+
+declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind)
+declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind)
+declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)))
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_only_caller() {
+; CHECK-LABEL: @p1_write_only_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @p1_write_only(ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p1_write_only(ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_then_read_caller() {
+; CHECK-LABEL: @p1_write_then_read_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @p1_write_then_read(ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p1_write_then_read(ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_nonalias_caller() {
+; CHECK-LABEL: @p2_same_range_nonalias_caller(
+; CHECK-NEXT:    %ptr1 = alloca i16, align 2
+; CHECK-NEXT:    %ptr2 = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT:    %l = load i16, ptr %ptr1
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr1 = alloca i16
+  %ptr2 = alloca i16
+  store i16 0, ptr %ptr1
+  store i16 0, ptr %ptr2
+  call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+  %l = load i16, ptr %ptr1
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_alias_caller() {
+; CHECK-LABEL: @p2_same_range_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_same_range(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p2_same_range(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_init_alias_caller() {
+; CHECK-LABEL: @p2_no_init_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr %ptr
+; CHECK-NEXT:    call void @p2_no_init(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p2_no_init(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_dead_on_unwind_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr %ptr
+; CHECK-NEXT:    call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
+declare void @large_p1(ptr nocapture noundef initializes((0, 200))) nounwind
+declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocapture noundef initializes((0, 100))) nounwind
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p1_caller() {
+; CHECK-LABEL: @large_p1_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @large_p1(ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 100, i1 false)
+  call void @large_p1(ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_nonalias_caller() {
+; CHECK-LABEL: @large_p2_nonalias_caller(
+; CHECK-NEXT:    %ptr1 = alloca i16, align 2
+; CHECK-NEXT:    %ptr2 = alloca i16, align 2
+; CHECK-NEXT:    call void @large_p2(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT:    %l = load i16, ptr %ptr1
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr1 = alloca i16
+  %ptr2 = alloca i16
+  call void @llvm.memset.p0.i64(ptr %ptr1, i8 42, i64 200, i1 false)
+  call void @llvm.memset.p0.i64(ptr %ptr2, i8 42, i64 100, i1 false)
+  call void @large_p2(ptr %ptr1, ptr %ptr2)
+  %l = load i16, ptr %ptr1
+  ret i16 %l
+}
+
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_alias_caller() {
+; CHECK-LABEL: @large_p2_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    %1 = getelementptr inbounds i8, ptr %ptr, i64 100
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 %1, i8 42, i64 200, i1 false)
+; CHECK-NEXT:    call void @large_p2(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 300, i1 false)
+  call void @large_p2(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+

@@ -0,0 +1,159 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is aa-pipeline necessary?

also this shouldn't be running function-attrs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, you are right. Removed the aa-pipeline and function-attrs.

/*ConsiderInitializesAttr=*/false));
assert(DeadDefWrapper.DefinedLocations.size() == 1);
MemoryLocationWrapper &DeadLocWrapper =
DeadDefWrapper.DefinedLocations.front();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure what's going on here, but it seems weird that we have two modes for MemoryDefWrapper, a single MemoryLocation version here and a multiple MemoryLocation below. is there any way to make this a little less hacked together? why does this have to be a single MemoryLocation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot apply the initializes attribute to DeadAccess/DeadDef since it would consider a call instruction as dead store and remove it incorrectly. Added a comment to explain it. Any suggestions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after staring at this a bit, I believe this preexisting code is conflating two things: it's assuming that if there is a memory location that DeadDefWrapper writes to that overlaps with KillingLocWrapper, it must have no other side effects and be deletable. this happens to be true for stores and libcalls like strcpy that are handled here, but is not necessarily true in general.

I think ideally we change isRemovable to be more accurate about arbitrary function calls, and check that here, but I'm ok with a TODO saying something like `TODO: this conflates the existence of a MemoryLocation with being able to delete the instruction. fix isRemovable() to consider calls with side effects that cannot be removed, e.g. calls with the initializes attribute, and remove getLocForInst(ConsiderInitializesAttr = false) workaround

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this still needs a comment

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooh, missed this comment. Added a TODO about isRemovable(). Thanks!

return Args[0].Inits;

ConstantRangeList MergedIntervals = Args[0].Inits;
for (size_t i = 1; i < Args.size(); i++)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stylish nit:
https://llvm.org/docs/CodingStandards.html#don-t-evaluate-end-every-time-through-a-loop
and capital variable names:
for (size_t I = 0, Count = Args.size(); I < Count; ++I)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this info! Will keep these style rules in mind.

if (!CB)
return false;

for (size_t Idx = 0; Idx < CB->arg_size(); Idx++)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here and ++Idx

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Thanks!

// Collect aliasing arguments and their initializes ranges.
bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind);
SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
return false;

for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx)
if (CB->paramHasAttr(Idx, Attribute::Initializes))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an AttributeList::.hasAttrSomewhere API to do this check efficiently.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct me if I'm wrong. Different from CallBase::paramHasAttr, AttributeList::.hasAttrSomewhere API does not look into the called function's parameter list so we cannot apply this API here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you need to call it for both the call and function attribute list. Or add a new CallBase API that does this for you.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CallBase::getArgOperandWithAttribute is exactly what we need (call AttributeList::.hasAttrSomewhere with both the instruction attr and the called function attr). Used this API :-D

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
Value *CurArg = CB->getArgOperand(Idx);
bool FoundAliasing = false;
for (auto &[Arg, AliasList] : Arguments) {
if (BatchAA.isMustAlias(Arg, CurArg)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This handle MustAlias arguments, but what about arguments that MayAlias or PartialAlias?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conservatively handle May-/Partial-Alias same as MustAlias.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just handling them the same as MustAlias isn't correct. If it's PartialAlias then there is an offset between the arguments, so initializes refers to different offsets. And if it's MayAlias, then there may be an unknown offset, or the arguments may be unrelated entirely.

For the PartialAlias/MayAlias cases we should discard the initializes information entirely.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, thanks for the reminder. Make sense! Updated to insert an empty range for May-/Partial-Alias. This empty range would discard the entire initializes info later while intersecting the ranges among all aliasing args.

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
}

if (auto Loc = getLocForWrite(I))
Locations.push_back(std::make_pair(*Loc, false));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we return here or does this need to fall through?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Changed it to early return.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted the latest change. We need to fall through. For a call instruction, getLocForWrite may return a memory-location with imprecise size. Then, fall through to check the initializes attr.

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp Outdated Show resolved Hide resolved
bool IsKillingDefFromInitAttr = false;
if (IsInitializesAttrMemLoc) {
if (KillingI == UseInst &&
KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this inner if is untested (test pass if I replace the condition with true).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch! Added a new unit test, p1_write_then_read_caller_with_clobber, to test this inner condition.

@nikic
Copy link
Contributor

nikic commented Sep 6, 2024

What does the compile-time impact for this look like?

@haopliu
Copy link
Contributor Author

haopliu commented Sep 10, 2024

What does the compile-time impact for this look like?

I think this PR (enable the flag) has a negligible compile-time impact. The diff between this PR (enable the flag) and adding the attribute inference is 0.

https://llvm-compile-time-tracker.com/compare.php?from=1d3f46b01fd95b1be7806ffb8968e8108fb51b5b&to=cde86c7084026e801edcacc85784d2678d05654e&stat=instructions%3Au

We guard the change with a flag to confirm everything is expected after landing all, then enable this flag.

for (auto &[Arg, AliasList] : Arguments) {
if (BatchAA.isNoAlias(Arg, CurArg)) {
continue;
} else if (BatchAA.isMustAlias(Arg, CurArg)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should call alias() only once and then check the result.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Correct me if I'm wrong). Here checks two Value* aliasing: we need to convert them to two MemoryLocation, then call alias(). However, it seems that isMustAlias() and isNoAlias() convert in different ways: MemoryLocation(V, LocationSize::precise(1)) VS. MemoryLocation::getBeforeOrAfter(V). Then we have to call alias() twice?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For MustAlias the size does not matter. Using getBeforeOrAfter() is always a safe option.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Updated to call BatchAA.alias() using getBeforeOrAfter() once, then check AAR. Thanks!

Copy link
Contributor

@jvoung jvoung left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! LG to me, but defer to other reviewers for approval.

I think aeubanks had one unresolved comment about a TODO comment?

return {};

// To address unwind, the function should have nounwind attribute or the
// arguments have dead_on_unwind attribute. Otherwise, return empty.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can you update the comment "the arguments have dead_on_unwind attribute"? I think now it is "dead or invisible on unwind"? Similar on line 862, and maybe the field "IsDeadOnUnwind" could be "IsDeadOrInvisibleOnUnwind"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

Copy link
Contributor

@aeubanks aeubanks left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lgtm with remaining comments addressed

@haopliu
Copy link
Contributor Author

haopliu commented Oct 18, 2024

Thank you all!

I think aeubanks had one unresolved comment about a TODO comment?

Thanks for the reminder! Done.

@aeubanks
Copy link
Contributor

@nikic any more comments?

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't re-review in detail, but looks ok from a quick look.

@@ -1147,13 +1196,26 @@ struct DSEState {
return MemoryLocation::getOrNone(I);
}

std::optional<MemoryLocation> getLocForInst(Instruction *I) {
// Returns a list of <MemoryLocation, bool> pairs wrote by I.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Returns a list of <MemoryLocation, bool> pairs wrote by I.
// Returns a list of <MemoryLocation, bool> pairs written by I.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

// which means no unwind edges from this call in the current function.
bool IsDeadOrInvisibleOnUnwind =
CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
(isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB));
(isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));

Cheaper check first.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

Comment on lines 850 to 851
return CB != nullptr &&
CB->getArgOperandWithAttribute(Attribute::Initializes) != nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe more readable?

Suggested change
return CB != nullptr &&
CB->getArgOperandWithAttribute(Attribute::Initializes) != nullptr;
return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

@@ -1602,7 +1665,16 @@ struct DSEState {

// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
if (isReadClobber(MaybeDeadLoc, UseInst)) {
// If KillingDef is a CallInst with "initializes" attribute, the reads in
// Callee would be dominated by initializations, so this should be safe.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Callee would be dominated by initializations, so this should be safe.
// the callee would be dominated by initializations, so this should be safe.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

// Note that this function considers:
// 1. Unwind edge: use "initializes" attribute only if the callee has
// "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
// or the argument is invisble to caller on unwind. That is, we don't
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// or the argument is invisble to caller on unwind. That is, we don't
// or the argument is invisible to caller on unwind. That is, we don't

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

Value *CurArg = CB->getArgOperand(Idx);
// We don't perform incorrect DSE on unwind edges in the current function,
// and use the "initializes" attribute to kill dead stores if:
// - The call does not throw exceptions, "CB->doesNotThrow()".
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is nounwind checked in getIntersectedInitRangeList, no need to check it in within IsDeadOrInvisibleOnUnwind as the others?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IsDeadOrInvisibleOnUnwind is per argument nounwind.
getIntersectedInitRangeList considers IsDeadOrInvisibleOnUnwind per argument and CB->doesNotThrow() together.

Comment on lines 2285 to 2286
// - Or the argument is invisible to caller on unwind, and CB isa<CallInst>
// which means no unwind edges from this call in the current function.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe nit: should be implicit that it's not an Invoke if there are no unwind edges?

Suggested change
// - Or the argument is invisible to caller on unwind, and CB isa<CallInst>
// which means no unwind edges from this call in the current function.
// - Or the argument is invisible to the caller on unwind, and it's a pure function call, i.e., there are no
// unwind edges from this call in the current function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't heard the term "pure function call" refer to a call that doesn't unwind, I prefer something closer to the current version.

Or the argument is invisible to caller on unwind, and there are no unwind edges from this call in the current function (e.g. `CallInst`).

There are probably (?) more cases than just CallInst where we do this optimization so I use e.g. instead of i.e..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you both, done!

@haopliu
Copy link
Contributor Author

haopliu commented Oct 24, 2024

Thank you all! I'm going to merge this PR.

@haopliu haopliu merged commit 089237c into llvm:main Oct 24, 2024
5 of 8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Oct 24, 2024

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-debian running on gribozavr4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/7648

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Analysis/BasicAA/modref.ll' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
RUN: at line 2: /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Analysis/BasicAA/modref.ll -aa-pipeline=basic-aa -passes=gvn,dse -S | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Analysis/BasicAA/modref.ll
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt -aa-pipeline=basic-aa -passes=gvn,dse -S
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Analysis/BasicAA/modref.ll
LLVM ERROR: Function @test2a changed by DSEPass without invalidating analyses
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt -aa-pipeline=basic-aa -passes=gvn,dse -S
1.	Running pass "function(gvn<>,dse)" on module "<stdin>"
2.	Running pass "dse" on function "test2a"
 #0 0x00000000045d7897 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x45d7897)
 #1 0x00000000045d534e llvm::sys::RunSignalHandlers() (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x45d534e)
 #2 0x00000000045d7f4f SignalHandler(int) Signals.cpp:0:0
 #3 0x00007f2812b26140 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x13140)
 #4 0x00007f281263ad51 raise (/lib/x86_64-linux-gnu/libc.so.6+0x38d51)
 #5 0x00007f2812624537 abort (/lib/x86_64-linux-gnu/libc.so.6+0x22537)
 #6 0x00000000045392aa llvm::report_fatal_error(llvm::Twine const&, bool) (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x45392aa)
 #7 0x000000000285239a void llvm::detail::UniqueFunctionBase<void, llvm::StringRef, llvm::Any, llvm::PreservedAnalyses const&>::CallImpl<llvm::PreservedCFGCheckerInstrumentation::registerCallbacks(llvm::PassInstrumentationCallbacks&, llvm::AnalysisManager<llvm::Module>&)::$_18>(void*, llvm::StringRef, llvm::Any&, llvm::PreservedAnalyses const&) StandardInstrumentations.cpp:0:0
 #8 0x0000000004407b1e llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x4407b1e)
 #9 0x0000000000d0f84d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) crtstuff.c:0:0
#10 0x000000000440bd50 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x440bd50)
#11 0x0000000000d0f61d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) crtstuff.c:0:0
#12 0x0000000004406917 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x4406917)
#13 0x000000000085dd00 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x85dd00)
#14 0x00000000008533e5 optMain (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x8533e5)
#15 0x00007f2812625d7a __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x23d7a)
#16 0x000000000084f74a _start (/b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt+0x84f74a)
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Analysis/BasicAA/modref.ll

--

********************


@llvm-ci
Copy link
Collaborator

llvm-ci commented Oct 24, 2024

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-ubuntu running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/187/builds/2087

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Analysis/BasicAA/modref.ll' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
RUN: at line 2: /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/opt < /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/Analysis/BasicAA/modref.ll -aa-pipeline=basic-aa -passes=gvn,dse -S | /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/Analysis/BasicAA/modref.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/Analysis/BasicAA/modref.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/opt -aa-pipeline=basic-aa -passes=gvn,dse -S
LLVM ERROR: Function @test2a changed by DSEPass without invalidating analyses
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/opt -aa-pipeline=basic-aa -passes=gvn,dse -S
1.	Running pass "function(gvn<>,dse)" on module "<stdin>"
2.	Running pass "dse" on function "test2a"
 #0 0x000056266bf05268 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/lib/Support/Unix/Signals.inc:723:22
 #1 0x000056266bf05689 PrintStackTraceSignalHandler(void*) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/lib/Support/Unix/Signals.inc:798:1
 #2 0x000056266bf02ad9 llvm::sys::RunSignalHandlers() /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/lib/Support/Signals.cpp:105:20
 #3 0x000056266bf04b00 SignalHandler(int) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/lib/Support/Unix/Signals.inc:413:1
 #4 0x00007f57e2a9b520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #5 0x00007f57e2aef9fc pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x969fc)
 #6 0x00007f57e2a9b476 gsignal (/lib/x86_64-linux-gnu/libc.so.6+0x42476)
 #7 0x00007f57e2a817f3 abort (/lib/x86_64-linux-gnu/libc.so.6+0x287f3)
 #8 0x000056266be15743 llvm::report_fatal_error(llvm::Twine const&, bool) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/lib/Support/ErrorHandling.cpp:126:9
 #9 0x0000562668ec9cf1 llvm::PreservedCFGCheckerInstrumentation::registerCallbacks(llvm::PassInstrumentationCallbacks&, llvm::AnalysisManager<llvm::Module>&)::'lambda1'(llvm::StringRef, llvm::Any, llvm::PreservedAnalyses const&)::operator()(llvm::StringRef, llvm::Any, llvm::PreservedAnalyses const&) const /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp:1444:63
#10 0x0000562668edee37 void llvm::detail::UniqueFunctionBase<void, llvm::StringRef, llvm::Any, llvm::PreservedAnalyses const&>::CallImpl<llvm::PreservedCFGCheckerInstrumentation::registerCallbacks(llvm::PassInstrumentationCallbacks&, llvm::AnalysisManager<llvm::Module>&)::'lambda1'(llvm::StringRef, llvm::Any, llvm::PreservedAnalyses const&)>(void*, llvm::StringRef, llvm::Any&, llvm::PreservedAnalyses const&) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h:222:16
#11 0x000056266929a85d llvm::unique_function<void (llvm::StringRef, llvm::Any, llvm::PreservedAnalyses const&)>::operator()(llvm::StringRef, llvm::Any, llvm::PreservedAnalyses const&) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h:387:62
#12 0x000056266aceb38c void llvm::PassInstrumentation::runAfterPass<llvm::Function, llvm::detail::PassConcept<llvm::Function, llvm::AnalysisManager<llvm::Function>>>(llvm::detail::PassConcept<llvm::Function, llvm::AnalysisManager<llvm::Function>> const&, llvm::Function const&, llvm::PreservedAnalyses const&) const /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/include/llvm/IR/PassInstrumentation.h:273:30
#13 0x000056266bc2aafc llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/include/llvm/IR/PassManagerImpl.h:93:27
#14 0x00005626664fc67d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:91:3
#15 0x000056266bc29abe llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/lib/IR/PassManager.cpp:129:23
#16 0x00005626664fc5ad llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:91:3
#17 0x000056266bc2a6df llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/include/llvm/IR/PassManagerImpl.h:85:18
#18 0x0000562665bb947e llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/tools/opt/NewPMDriver.cpp:541:10
#19 0x0000562665b89de3 optMain /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/tools/opt/optdriver.cpp:739:27
#20 0x0000562665b875c1 main /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/tools/opt/opt.cpp:25:64
#21 0x00007f57e2a82d90 (/lib/x86_64-linux-gnu/libc.so.6+0x29d90)
#22 0x00007f57e2a82e40 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x29e40)
#23 0x0000562665b874a5 _start (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/opt+0xcf34a5)
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/Analysis/BasicAA/modref.ll

--

********************


@mikaelholmen
Copy link
Collaborator

@haopliu : As the build bots say, many lit tests fail when I've compiled with EXPENSIVE_CHECKS on.
Tests fail like
LLVM ERROR: Function @test1 changed by DSEPass without invalidating analyses

alexey-bataev added a commit that referenced this pull request Oct 24, 2024
@momchil-velikov
Copy link
Collaborator

The revert went to a branch instead of main ?

aeubanks added a commit that referenced this pull request Oct 24, 2024
Reverts #107282

Seems to be causing invalid analysis caching as mentioned in
#107282 (comment).
@aeubanks
Copy link
Contributor

reverted in #113589

LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
<< *KillingLocWrapper.MemDef << " ("
<< *KillingLocWrapper.DefInst << ")\n");
auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like we're not taking this Changed into account

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, nice catch! Is there a way to launch an offline buildbot run to validate? https://lab.llvm.org/buildbot/

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you repro locally with the same cmake flags as the bot?
E.g., -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON in this step https://lab.llvm.org/buildbot/#/builders/16/builds/7648/steps/4/logs/stdio ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, I repro the failure locally and confirmed that MadeChange |= Changed; works.
Will retry this PR!

haopliu added a commit that referenced this pull request Oct 25, 2024
retry #107282

Fixed with `MadeChange |= Changed;` and confirmed it works.

```
cmake -DLLVM_CCACHE_BUILD=ON -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON -DLLVM_ENABLE_WERROR=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=-U_GLIBCXX_DEBUG '-DLLVM_LIT_ARGS=-v -vv -j96' '-DLLVM_ENABLE_PROJECTS=llvm;lld' -DLLVM_ENABLE_ASSERTIONS=ON -GNinja ../llvm

ninja check-llvm
```
@haopliu
Copy link
Contributor Author

haopliu commented Oct 25, 2024

Re-merged this PR in #113630 with a fix.

@frobtech frobtech mentioned this pull request Oct 25, 2024
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
Apply the initializes attribute to DSE and guard with a flag,
"enable-dse-initializes-attr-improvement".

The attribute support has been landed in:
llvm#84803
The attribute inference will be landed after this PR:
llvm#97373
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
Reverts llvm#107282

Seems to be causing invalid analysis caching as mentioned in
llvm#107282 (comment).
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
retry llvm#107282

Fixed with `MadeChange |= Changed;` and confirmed it works.

```
cmake -DLLVM_CCACHE_BUILD=ON -DLLVM_ENABLE_EXPENSIVE_CHECKS=ON -DLLVM_ENABLE_WERROR=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=-U_GLIBCXX_DEBUG '-DLLVM_LIT_ARGS=-v -vv -j96' '-DLLVM_ENABLE_PROJECTS=llvm;lld' -DLLVM_ENABLE_ASSERTIONS=ON -GNinja ../llvm

ninja check-llvm
```
haopliu added a commit that referenced this pull request Dec 10, 2024
Tested with an internal search backend loadtest.

With `-ftrivial-auto-var-init`, this work has a 0.2%-0.3% total QPS
improvement.

Note that, the metric is total QPS instead of cpu-time, even 1%
improvement means a lot.

- Add the "initializes" attr:
#97373
- Apply the attr to DSE:
#107282
broxigarchen pushed a commit to broxigarchen/llvm-project that referenced this pull request Dec 10, 2024
Tested with an internal search backend loadtest.

With `-ftrivial-auto-var-init`, this work has a 0.2%-0.3% total QPS
improvement.

Note that, the metric is total QPS instead of cpu-time, even 1%
improvement means a lot.

- Add the "initializes" attr:
llvm#97373
- Apply the attr to DSE:
llvm#107282
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

10 participants