From f80c248a2f26ae80c931eb6dfdec0aea533de537 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne@gmail.com>
Date: Mon, 26 Aug 2024 13:20:33 +0200
Subject: [PATCH 01/65] [analyzer][NFC] Add tests for and refactor
 StackAddrEscapeChecker 1/3 (#105652)

These tests and refactoring are preparatory for the upcoming changes:
detection of the indirect leak via global variables and output
parameters.

CPP-4734

-------

This is the first of three commits constituting
https://github.com/llvm/llvm-project/pull/105648
---
 .../Checkers/StackAddrEscapeChecker.cpp       |  71 ++-
 clang/test/Analysis/stack-addr-ps.c           |  31 +
 clang/test/Analysis/stack-addr-ps.cpp         | 596 ++++++++++++++++++
 3 files changed, 665 insertions(+), 33 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index ea09c43cc5ce90..2bd4ca4528de8b 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -288,12 +288,37 @@ void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
   EmitStackError(C, R, RetE);
 }
 
+std::optional<std::string> printReferrer(const MemRegion *Referrer) {
+  assert(Referrer);
+  const StringRef ReferrerMemorySpace = [](const MemSpaceRegion *Space) {
+    if (isa<StaticGlobalSpaceRegion>(Space))
+      return "static";
+    if (isa<GlobalsSpaceRegion>(Space))
+      return "global";
+    assert(isa<StackSpaceRegion>(Space));
+    return "stack";
+  }(Referrer->getMemorySpace());
+
+  // We should really only have VarRegions here.
+  // Anything else is really surprising, and we should get notified if such
+  // ever happens.
+  const auto *ReferrerVar = dyn_cast<VarRegion>(Referrer);
+  if (!ReferrerVar) {
+    assert(false && "We should have a VarRegion here");
+    return std::nullopt; // Defensively skip this one.
+  }
+  const std::string ReferrerVarName =
+      ReferrerVar->getDecl()->getDeclName().getAsString();
+
+  return (ReferrerMemorySpace + " variable '" + ReferrerVarName + "'").str();
+}
+
 void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
                                               CheckerContext &Ctx) const {
   if (!ChecksEnabled[CK_StackAddrEscapeChecker])
     return;
 
-  ProgramStateRef State = Ctx.getState();
+  ExplodedNode *Node = Ctx.getPredecessor();
 
   // Iterate over all bindings to global variables and see if it contains
   // a memory region in the stack space.
@@ -315,15 +340,10 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
       if (!ReferrerMemSpace || !ReferredMemSpace)
         return false;
 
-      const auto *ReferrerFrame = ReferrerMemSpace->getStackFrame();
-      const auto *ReferredFrame = ReferredMemSpace->getStackFrame();
-
-      if (ReferrerMemSpace && ReferredMemSpace) {
-        if (ReferredFrame == PoppedFrame &&
-            ReferrerFrame->isParentOf(PoppedFrame)) {
-          V.emplace_back(Referrer, Referred);
-          return true;
-        }
+      if (ReferredMemSpace->getStackFrame() == PoppedFrame &&
+          ReferrerMemSpace->getStackFrame()->isParentOf(PoppedFrame)) {
+        V.emplace_back(Referrer, Referred);
+        return true;
       }
       return false;
     }
@@ -352,6 +372,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
   };
 
   CallBack Cb(Ctx);
+  ProgramStateRef State = Node->getState();
   State->getStateManager().getStoreManager().iterBindings(State->getStore(),
                                                           Cb);
 
@@ -359,7 +380,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
     return;
 
   // Generate an error node.
-  ExplodedNode *N = Ctx.generateNonFatalErrorNode(State);
+  ExplodedNode *N = Ctx.generateNonFatalErrorNode(State, Node);
   if (!N)
     return;
 
@@ -374,13 +395,13 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
 
     // Generate a report for this bug.
     const StringRef CommonSuffix =
-        "upon returning to the caller.  This will be a dangling reference";
+        " upon returning to the caller.  This will be a dangling reference";
     SmallString<128> Buf;
     llvm::raw_svector_ostream Out(Buf);
     const SourceRange Range = genName(Out, Referred, Ctx.getASTContext());
 
     if (isa<CXXTempObjectRegion, CXXLifetimeExtendedObjectRegion>(Referrer)) {
-      Out << " is still referred to by a temporary object on the stack "
+      Out << " is still referred to by a temporary object on the stack"
           << CommonSuffix;
       auto Report =
           std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
@@ -390,28 +411,12 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
       return;
     }
 
-    const StringRef ReferrerMemorySpace = [](const MemSpaceRegion *Space) {
-      if (isa<StaticGlobalSpaceRegion>(Space))
-        return "static";
-      if (isa<GlobalsSpaceRegion>(Space))
-        return "global";
-      assert(isa<StackSpaceRegion>(Space));
-      return "stack";
-    }(Referrer->getMemorySpace());
-
-    // We should really only have VarRegions here.
-    // Anything else is really surprising, and we should get notified if such
-    // ever happens.
-    const auto *ReferrerVar = dyn_cast<VarRegion>(Referrer);
-    if (!ReferrerVar) {
-      assert(false && "We should have a VarRegion here");
-      continue; // Defensively skip this one.
+    auto ReferrerVariable = printReferrer(Referrer);
+    if (!ReferrerVariable) {
+      continue;
     }
-    const std::string ReferrerVarName =
-        ReferrerVar->getDecl()->getDeclName().getAsString();
 
-    Out << " is still referred to by the " << ReferrerMemorySpace
-        << " variable '" << ReferrerVarName << "' " << CommonSuffix;
+    Out << " is still referred to by the " << *ReferrerVariable << CommonSuffix;
     auto Report =
         std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
     if (Range.isValid())
diff --git a/clang/test/Analysis/stack-addr-ps.c b/clang/test/Analysis/stack-addr-ps.c
index e69ab4189b524f..2e14b7820be136 100644
--- a/clang/test/Analysis/stack-addr-ps.c
+++ b/clang/test/Analysis/stack-addr-ps.c
@@ -95,3 +95,34 @@ void callTestRegister(void) {
     char buf[20];
     testRegister(buf); // no-warning
 }
+
+void top_level_leaking(int **out) {
+  int local = 42;
+  *out = &local; // no-warning FIXME
+}
+
+void callee_leaking_via_param(int **out) {
+  int local = 1;
+  *out = &local;
+  // expected-warning@-1{{Address of stack memory associated with local variable 'local' is still referred to by the stack variable 'ptr'}}
+}
+
+void caller_for_leaking_callee() {
+  int *ptr = 0;
+  callee_leaking_via_param(&ptr);
+}
+
+void callee_nested_leaking(int **out) {
+  int local = 1;
+  *out = &local;
+  // expected-warning@-1{{Address of stack memory associated with local variable 'local' is still referred to by the stack variable 'ptr'}}
+}
+
+void caller_mid_for_nested_leaking(int **mid) {
+  callee_nested_leaking(mid);
+}
+
+void caller_for_nested_leaking() {
+  int *ptr = 0;
+  caller_mid_for_nested_leaking(&ptr);
+}
diff --git a/clang/test/Analysis/stack-addr-ps.cpp b/clang/test/Analysis/stack-addr-ps.cpp
index bd856be2b8d690..68ccc322bf2ef2 100644
--- a/clang/test/Analysis/stack-addr-ps.cpp
+++ b/clang/test/Analysis/stack-addr-ps.cpp
@@ -161,3 +161,599 @@ C make1() {
 void test_copy_elision() {
   C c1 = make1();
 }
+
+namespace leaking_via_direct_pointer {
+void* returned_direct_pointer_top() {
+  int local = 42;
+  int* p = &local;
+  return p; // expected-warning{{associated with local variable 'local' returned}}
+}
+
+int* returned_direct_pointer_callee() {
+  int local = 42;
+  int* p = &local;
+  return p; // expected-warning{{associated with local variable 'local' returned}}
+}
+
+void returned_direct_pointer_caller() {
+  int* loc_ptr = nullptr;
+  loc_ptr = returned_direct_pointer_callee();
+  (void)loc_ptr;
+}
+
+void* global_ptr;
+
+void global_direct_pointer() {
+  int local = 42;
+  global_ptr = &local;
+} // expected-warning{{local variable 'local' is still referred to by the global variable 'global_ptr'}}
+
+void static_direct_pointer_top() {
+  int local = 42;
+  static int* p = &local;
+  (void)p;
+} // expected-warning{{local variable 'local' is still referred to by the static variable 'p'}}
+
+void static_direct_pointer_callee() {
+  int local = 42;
+  static int* p = &local;
+  (void)p; // expected-warning{{local variable 'local' is still referred to by the static variable 'p'}}
+}
+
+void static_direct_pointer_caller() {
+  static_direct_pointer_callee();
+}
+
+void lambda_to_global_direct_pointer() {
+  auto lambda = [&] {
+    int local = 42;
+    global_ptr = &local; // expected-warning{{local variable 'local' is still referred to by the global variable 'global_ptr'}}
+  };
+  lambda();
+}
+
+void lambda_to_context_direct_pointer() {
+  int *p = nullptr;
+  auto lambda = [&] {
+    int local = 42;
+    p = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}}
+  };
+  lambda();
+  (void)p;
+}
+
+template<typename Callable>
+class MyFunction {
+  Callable* fptr;
+  public:
+  MyFunction(Callable* callable) :fptr(callable) {}
+};
+
+void* lambda_to_context_direct_pointer_uncalled() {
+  int *p = nullptr;
+  auto lambda = [&] {
+    int local = 42;
+    p = &local; // no-warning: analyzed only as top-level, ignored explicitly by the checker
+  };
+  return new MyFunction(&lambda);
+}
+
+void lambda_to_context_direct_pointer_lifetime_extended() {
+  int *p = nullptr;
+  auto lambda = [&] {
+    int&& local = 42;
+    p = &local; // expected-warning{{'int' lifetime extended by local variable 'local' is still referred to by the stack variable 'p'}}
+  };
+  lambda();
+  (void)p;
+}
+
+template<typename Callback>
+void lambda_param_capture_direct_pointer_callee(Callback& callee) {
+  int local = 42;
+  callee(local); // expected-warning{{'local' is still referred to by the stack variable 'p'}}
+}
+
+void lambda_param_capture_direct_pointer_caller() {
+  int* p = nullptr;
+  auto capt = [&p](int& param) {
+    p = &param;
+  };
+  lambda_param_capture_direct_pointer_callee(capt);
+}
+} // namespace leaking_via_direct_pointer
+
+namespace leaking_via_ptr_to_ptr {
+void** returned_ptr_to_ptr_top() {
+  int local = 42;
+  int* p = &local;
+  void** pp = (void**)&p;
+  return pp; // expected-warning{{associated with local variable 'p' returned}}
+}
+
+void** global_pp;
+
+void global_ptr_local_to_ptr() {
+  int local = 42;
+  int* p = &local;
+  global_pp = (void**)&p;
+} // expected-warning{{local variable 'p' is still referred to by the global variable 'global_pp'}}
+
+void global_ptr_to_ptr() {
+  int local = 42;
+  *global_pp = &local; // no-warning FIXME
+}
+
+void *** global_ppp;
+
+void global_ptr_to_ptr_to_ptr() {
+  int local = 42;
+  **global_ppp = &local; // no-warning FIXME
+}
+
+void** get_some_pp();
+
+void static_ptr_to_ptr() {
+  int local = 42;
+  static void** pp = get_some_pp();
+  *pp = &local;
+} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer.
+
+void param_ptr_to_ptr_top(void** pp) {
+  int local = 42;
+  *pp = &local; // no-warning FIXME
+}
+
+void param_ptr_to_ptr_callee(void** pp) {
+  int local = 42;
+  *pp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}}
+}
+
+void param_ptr_to_ptr_caller() {
+  void* p = nullptr;
+  param_ptr_to_ptr_callee((void**)&p);
+}
+
+void param_ptr_to_ptr_to_ptr_top(void*** ppp) {
+  int local = 42;
+  **ppp = &local; // no-warning FIXME
+}
+
+void param_ptr_to_ptr_to_ptr_callee(void*** ppp) {
+  int local = 42;
+  **ppp = &local; // no-warning FIXME
+}
+
+void param_ptr_to_ptr_to_ptr_caller(void** pp) {
+  param_ptr_to_ptr_to_ptr_callee(&pp);
+}
+
+void lambda_to_context_ptr_to_ptr(int **pp) {
+  auto lambda = [&] {
+    int local = 42;
+    *pp = &local; // no-warning FIXME
+  };
+  lambda();
+  (void)*pp;
+}
+
+void param_ptr_to_ptr_fptr(int **pp) {
+  int local = 42;
+  *pp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}}
+}
+
+void param_ptr_to_ptr_fptr_caller(void (*fptr)(int**)) {
+  int* p = nullptr;
+  fptr(&p);
+}
+
+void param_ptr_to_ptr_caller_caller() {
+  void (*fptr)(int**) = param_ptr_to_ptr_fptr;
+  param_ptr_to_ptr_fptr_caller(fptr);
+}
+} // namespace leaking_via_ptr_to_ptr
+
+namespace leaking_via_ref_to_ptr {
+void** make_ptr_to_ptr();
+void*& global_rtp = *make_ptr_to_ptr();
+
+void global_ref_to_ptr() {
+  int local = 42;
+  int* p = &local;
+  global_rtp = p; // no-warning FIXME
+}
+
+void static_ref_to_ptr() {
+  int local = 42;
+  static void*& p = *make_ptr_to_ptr();
+  p = &local;
+  (void)p;
+} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer.
+
+void param_ref_to_ptr_top(void*& rp) {
+  int local = 42;
+  int* p = &local;
+  rp = p; // no-warning FIXME
+}
+
+void param_ref_to_ptr_callee(void*& rp) {
+  int local = 42;
+  int* p = &local;
+  rp = p; // expected-warning{{local variable 'local' is still referred to by the stack variable 'p'}}
+}
+
+void param_ref_to_ptr_caller() {
+  void* p = nullptr;
+  param_ref_to_ptr_callee(p);
+}
+} // namespace leaking_via_ref_to_ptr
+
+namespace leaking_via_arr_of_ptr_static_idx {
+void** returned_arr_of_ptr_top() {
+  int local = 42;
+  int* p = &local;
+  void** arr = new void*[2];
+  arr[1] = p;
+  return arr;
+} // no-warning False Negative
+
+void** returned_arr_of_ptr_callee() {
+  int local = 42;
+  int* p = &local;
+  void** arr = new void*[2];
+  arr[1] = p;
+  return arr;
+} // no-warning False Negative
+
+void returned_arr_of_ptr_caller() {
+  void** arr = returned_arr_of_ptr_callee();
+  (void)arr[1];
+}
+
+void* global_aop[2];
+
+void global_arr_of_ptr() {
+  int local = 42;
+  int* p = &local;
+  global_aop[1] = p;
+} // expected-warning{{local variable 'local' is still referred to by the global variable 'global_aop'}}
+
+void static_arr_of_ptr() {
+  int local = 42;
+  static void* arr[2];
+  arr[1] = &local;
+  (void)arr[1];
+} // expected-warning{{local variable 'local' is still referred to by the static variable 'arr'}}
+
+void param_arr_of_ptr_top(void* arr[2]) {
+  int local = 42;
+  int* p = &local;
+  arr[1] = p; // no-warning FIXME
+}
+
+void param_arr_of_ptr_callee(void* arr[2]) {
+  int local = 42;
+  int* p = &local;
+  arr[1] = p; // expected-warning{{local variable 'local' is still referred to by the stack variable 'arrStack'}}
+}
+
+void param_arr_of_ptr_caller() {
+  void* arrStack[2];
+  param_arr_of_ptr_callee(arrStack);
+  (void)arrStack[1];
+}
+} // namespace leaking_via_arr_of_ptr_static_idx
+
+namespace leaking_via_arr_of_ptr_dynamic_idx {
+void** returned_arr_of_ptr_top(int idx) {
+  int local = 42;
+  int* p = &local;
+  void** arr = new void*[2];
+  arr[idx] = p;
+  return arr;
+} // no-warning False Negative
+
+void** returned_arr_of_ptr_callee(int idx) {
+  int local = 42;
+  int* p = &local;
+  void** arr = new void*[2];
+  arr[idx] = p;
+  return arr;
+} // no-warning False Negative
+
+void returned_arr_of_ptr_caller(int idx) {
+  void** arr = returned_arr_of_ptr_callee(idx);
+  (void)arr[idx];
+}
+
+void* global_aop[2];
+
+void global_arr_of_ptr(int idx) {
+  int local = 42;
+  int* p = &local;
+  global_aop[idx] = p;
+} // expected-warning{{local variable 'local' is still referred to by the global variable 'global_aop'}}
+
+void static_arr_of_ptr(int idx) {
+  int local = 42;
+  static void* arr[2];
+  arr[idx] = &local;
+  (void)arr[idx];
+} // expected-warning{{local variable 'local' is still referred to by the static variable 'arr'}}
+
+void param_arr_of_ptr_top(void* arr[2], int idx) {
+  int local = 42;
+  int* p = &local;
+  arr[idx] = p; // no-warning FIXME
+}
+
+void param_arr_of_ptr_callee(void* arr[2], int idx) {
+  int local = 42;
+  int* p = &local;
+  arr[idx] = p; // expected-warning{{local variable 'local' is still referred to by the stack variable 'arrStack'}}
+}
+
+void param_arr_of_ptr_caller(int idx) {
+  void* arrStack[2];
+  param_arr_of_ptr_callee(arrStack, idx);
+  (void)arrStack[idx];
+}
+} // namespace leaking_via_arr_of_ptr_dynamic_idx
+
+namespace leaking_via_struct_with_ptr {
+struct S {
+  int* p;
+};
+
+S returned_struct_with_ptr_top() {
+  int local = 42;
+  S s;
+  s.p = &local;
+  return s;
+} // no-warning False Negative, requires traversing returned LazyCompoundVals
+
+S returned_struct_with_ptr_callee() {
+  int local = 42;
+  S s;
+  s.p = &local;
+  return s; // expected-warning{{'local' is still referred to by the stack variable 's'}}
+}
+
+void returned_struct_with_ptr_caller() {
+  S s = returned_struct_with_ptr_callee();
+  (void)s.p;
+}
+
+S global_s;
+
+void global_struct_with_ptr() {
+  int local = 42;
+  global_s.p = &local;
+} // expected-warning{{'local' is still referred to by the global variable 'global_s'}}
+
+void static_struct_with_ptr() {
+  int local = 42;
+  static S s;
+  s.p = &local;
+  (void)s.p;
+} // expected-warning{{'local' is still referred to by the static variable 's'}}
+} // namespace leaking_via_struct_with_ptr
+
+namespace leaking_via_ref_to_struct_with_ptr {
+struct S {
+  int* p;
+};
+
+S &global_s = *(new S);
+
+void global_ref_to_struct_with_ptr() {
+  int local = 42;
+  global_s.p = &local; // no-warning FIXME
+}
+
+void static_ref_to_struct_with_ptr() {
+  int local = 42;
+  static S &s = *(new S);
+  s.p = &local;
+  (void)s.p;
+} // no-warning False Negative, requires relating multiple bindings to cross a heap region.
+
+void param_ref_to_struct_with_ptr_top(S &s) {
+  int local = 42;
+  s.p = &local; // no-warning FIXME
+}
+
+void param_ref_to_struct_with_ptr_callee(S &s) {
+  int local = 42;
+  s.p = &local; // expected-warning{{'local' is still referred to by the stack variable 'sStack'}}
+}
+
+void param_ref_to_struct_with_ptr_caller() {
+  S sStack;
+  param_ref_to_struct_with_ptr_callee(sStack);
+}
+
+template<typename Callable>
+void lambda_param_capture_callee(Callable& callee) {
+  int local = 42;
+  callee(local); // expected-warning{{'local' is still referred to by the stack variable 'p'}}
+}
+
+void lambda_param_capture_caller() {
+  int* p = nullptr;
+  auto capt = [&p](int& param) {
+    p = &param;
+  };
+  lambda_param_capture_callee(capt);
+}
+} // namespace leaking_via_ref_to_struct_with_ptr
+
+namespace leaking_via_ptr_to_struct_with_ptr {
+struct S {
+  int* p;
+};
+
+S* returned_ptr_to_struct_with_ptr_top() {
+  int local = 42;
+  S* s = new S;
+  s->p = &local;
+  return s;
+} // no-warning False Negative
+
+S* returned_ptr_to_struct_with_ptr_callee() {
+  int local = 42;
+  S* s = new S;
+  s->p = &local;
+  return s;
+} // no-warning False Negative
+
+void returned_ptr_to_struct_with_ptr_caller() {
+  S* s = returned_ptr_to_struct_with_ptr_callee();
+  (void)s->p;
+}
+
+S* global_s;
+
+void global_ptr_to_struct_with_ptr() {
+  int local = 42;
+  global_s->p = &local; // no-warning FIXME
+}
+
+void static_ptr_to_struct_with_ptr_new() {
+  int local = 42;
+  static S* s = new S;
+  s->p = &local;
+  (void)s->p;
+} // no-warning  False Negative, requires relating multiple bindings to cross a heap region.
+
+S* get_some_s();
+
+void static_ptr_to_struct_with_ptr_generated() {
+  int local = 42;
+  static S* s = get_some_s();
+  s->p = &local;
+} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer.
+
+void param_ptr_to_struct_with_ptr_top(S* s) {
+  int local = 42;
+  s->p = &local; // no-warning FIXME
+}
+
+void param_ptr_to_struct_with_ptr_callee(S* s) {
+  int local = 42;
+  s->p = &local; // expected-warning{{'local' is still referred to by the stack variable 's'}}
+}
+
+void param_ptr_to_struct_with_ptr_caller() {
+  S s;
+  param_ptr_to_struct_with_ptr_callee(&s);
+  (void)s.p;
+}
+} // namespace leaking_via_ptr_to_struct_with_ptr
+
+namespace leaking_via_arr_of_struct_with_ptr {
+struct S {
+  int* p;
+};
+
+S* returned_ptr_to_struct_with_ptr_top() {
+  int local = 42;
+  S* s = new S[2];
+  s[1].p = &local;
+  return s;
+} // no-warning False Negative
+
+S* returned_ptr_to_struct_with_ptr_callee() {
+  int local = 42;
+  S* s = new S[2];
+  s[1].p = &local;
+  return s;
+} // no-warning  False Negative
+
+void returned_ptr_to_struct_with_ptr_caller() {
+  S* s = returned_ptr_to_struct_with_ptr_callee();
+  (void)s[1].p;
+}
+
+S global_s[2];
+
+void global_ptr_to_struct_with_ptr() {
+  int local = 42;
+  global_s[1].p = &local;
+} // expected-warning{{'local' is still referred to by the global variable 'global_s'}}
+
+void static_ptr_to_struct_with_ptr_new() {
+  int local = 42;
+  static S* s = new S[2];
+  s[1].p = &local;
+  (void)s[1].p;
+}
+
+S* get_some_s();
+
+void static_ptr_to_struct_with_ptr_generated() {
+  int local = 42;
+  static S* s = get_some_s();
+  s[1].p = &local;
+} // no-warning False Negative, requires relating multiple bindings to cross the invented pointer.
+
+void param_ptr_to_struct_with_ptr_top(S s[2]) {
+  int local = 42;
+  s[1].p = &local; // no-warning FIXME
+}
+
+void param_ptr_to_struct_with_ptr_callee(S s[2]) {
+  int local = 42;
+  s[1].p = &local; // expected-warning{{'local' is still referred to by the stack variable 's'}}
+}
+
+void param_ptr_to_struct_with_ptr_caller() {
+  S s[2];
+  param_ptr_to_struct_with_ptr_callee(s);
+  (void)s[1].p;
+}
+} // namespace leaking_via_arr_of_struct_with_ptr
+
+namespace leaking_via_nested_and_indirect {
+struct NestedAndTransitive {
+  int** p;
+  NestedAndTransitive* next[3];
+};
+
+NestedAndTransitive global_nat;
+
+void global_nested_and_transitive() {
+  int local = 42;
+  *global_nat.next[2]->next[1]->p = &local; // no-warning FIXME
+}
+
+void param_nested_and_transitive_top(NestedAndTransitive* nat) {
+  int local = 42;
+  *nat->next[2]->next[1]->p = &local; // no-warning FIXME
+}
+
+void param_nested_and_transitive_callee(NestedAndTransitive* nat) {
+  int local = 42;
+  *nat->next[2]->next[1]->p = &local; // no-warning FIXME
+}
+
+void param_nested_and_transitive_caller(NestedAndTransitive natCaller) {
+  param_nested_and_transitive_callee(&natCaller);
+}
+
+} // namespace leaking_via_nested_and_indirect
+
+namespace leaking_as_member {
+class CRef {
+  int& ref; // expected-note{{reference member declared here}}
+  CRef(int x) : ref(x) {}
+  // expected-warning@-1 {{binding reference member 'ref' to stack allocated parameter 'x'}}
+};
+
+class CPtr {
+  int* ptr;
+  void memFun(int x) {
+    ptr = &x;
+  }
+};
+} // namespace leaking_as_member

From 54eb89fe74b31da9154c60923c01df77389b0d89 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl@gmail.com>
Date: Mon, 26 Aug 2024 07:29:00 -0400
Subject: [PATCH 02/65] [flang][NFC] AliasAnalysis: Prepare for PR #94242
 (#105899)

This PR extracts several small NFC changes from PR #94242 to make it
more readable.
---
 .../flang/Optimizer/Analysis/AliasAnalysis.h  | 18 +++++-----
 .../lib/Optimizer/Analysis/AliasAnalysis.cpp  | 35 ++++++++++---------
 ...alias-analysis-9.fir => ptr-component.fir} |  0
 3 files changed, 28 insertions(+), 25 deletions(-)
 rename flang/test/Analysis/AliasAnalysis/{alias-analysis-9.fir => ptr-component.fir} (100%)

diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
index 8cb6e92e41d97d..9a70b7fbfad2b6 100644
--- a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
+++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
@@ -153,20 +153,11 @@ struct AliasAnalysis {
     /// Return true, if Target or Pointer attribute is set.
     bool isTargetOrPointer() const;
 
-    /// Return true, if the memory source's `valueType` is a reference type
-    /// to an object of derived type that contains a component with POINTER
-    /// attribute.
-    bool isRecordWithPointerComponent() const;
-
     bool isDummyArgument() const;
     bool isData() const;
     bool isBoxData() const;
 
     mlir::Type getType() const;
-
-    /// Return true, if `ty` is a reference type to a boxed
-    /// POINTER object or a raw fir::PointerType.
-    static bool isPointerReference(mlir::Type ty);
   };
 
   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
@@ -183,6 +174,15 @@ struct AliasAnalysis {
   /// will stop at [hl]fir.declare if it represents a dummy
   /// argument declaration (i.e. it has the dummy_scope operand).
   Source getSource(mlir::Value, bool getInstantiationPoint = false);
+
+private:
+  /// Return true, if `ty` is a reference type to an object of derived type
+  /// that contains a component with POINTER attribute.
+  static bool isRecordWithPointerComponent(mlir::Type ty);
+
+  /// Return true, if `ty` is a reference type to a boxed
+  /// POINTER object or a raw fir::PointerType.
+  static bool isPointerReference(mlir::Type ty);
 };
 
 inline bool operator==(const AliasAnalysis::Source::SourceOrigin &lhs,
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index 2084962fde729a..e88da5a8ebae19 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -60,7 +60,15 @@ void AliasAnalysis::Source::print(llvm::raw_ostream &os) const {
   attributes.Dump(os, EnumToString);
 }
 
-bool AliasAnalysis::Source::isPointerReference(mlir::Type ty) {
+bool AliasAnalysis::isRecordWithPointerComponent(mlir::Type ty) {
+  auto eleTy = fir::dyn_cast_ptrEleTy(ty);
+  if (!eleTy)
+    return false;
+  // TO DO: Look for pointer components
+  return mlir::isa<fir::RecordType>(eleTy);
+}
+
+bool AliasAnalysis::isPointerReference(mlir::Type ty) {
   auto eleTy = fir::dyn_cast_ptrEleTy(ty);
   if (!eleTy)
     return false;
@@ -86,15 +94,7 @@ bool AliasAnalysis::Source::isBoxData() const {
          origin.isData;
 }
 
-bool AliasAnalysis::Source::isRecordWithPointerComponent() const {
-  auto eleTy = fir::dyn_cast_ptrEleTy(valueType);
-  if (!eleTy)
-    return false;
-  // TO DO: Look for pointer components
-  return mlir::isa<fir::RecordType>(eleTy);
-}
-
-AliasResult AliasAnalysis::alias(Value lhs, Value rhs) {
+AliasResult AliasAnalysis::alias(mlir::Value lhs, mlir::Value rhs) {
   // TODO: alias() has to be aware of the function scopes.
   // After MLIR inlining, the current implementation may
   // not recognize non-aliasing entities.
@@ -111,6 +111,7 @@ AliasResult AliasAnalysis::alias(Value lhs, Value rhs) {
   // it aliases with everything
   if (lhsSrc.kind >= SourceKind::Indirect ||
       rhsSrc.kind >= SourceKind::Indirect) {
+    LLVM_DEBUG(llvm::dbgs() << "  aliasing because of indirect access\n");
     return AliasResult::MayAlias;
   }
 
@@ -169,10 +170,12 @@ AliasResult AliasAnalysis::alias(Value lhs, Value rhs) {
   // Box for POINTER component inside an object of a derived type
   // may alias box of a POINTER object, as well as boxes for POINTER
   // components inside two objects of derived types may alias.
-  if ((src1->isRecordWithPointerComponent() && src2->isTargetOrPointer()) ||
-      (src2->isRecordWithPointerComponent() && src1->isTargetOrPointer()) ||
-      (src1->isRecordWithPointerComponent() &&
-       src2->isRecordWithPointerComponent())) {
+  if ((isRecordWithPointerComponent(src1->valueType) &&
+       src2->isTargetOrPointer()) ||
+      (isRecordWithPointerComponent(src2->valueType) &&
+       src1->isTargetOrPointer()) ||
+      (isRecordWithPointerComponent(src1->valueType) &&
+       isRecordWithPointerComponent(src2->valueType))) {
     LLVM_DEBUG(llvm::dbgs() << "  aliasing because of pointer components\n");
     return AliasResult::MayAlias;
   }
@@ -310,7 +313,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
 
           // TODO: Take followBoxData into account when setting the pointer
           // attribute
-          if (Source::isPointerReference(ty))
+          if (isPointerReference(ty))
             attributes.set(Attribute::Pointer);
           global = llvm::cast<fir::AddrOfOp>(op).getSymbol();
           breakFromLoop = true;
@@ -387,7 +390,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
       if (fir::valueHasFirAttribute(v, fir::getTargetAttrName()))
         attributes.set(Attribute::Target);
 
-      if (Source::isPointerReference(ty))
+      if (isPointerReference(ty))
         attributes.set(Attribute::Pointer);
     }
 
diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-9.fir b/flang/test/Analysis/AliasAnalysis/ptr-component.fir
similarity index 100%
rename from flang/test/Analysis/AliasAnalysis/alias-analysis-9.fir
rename to flang/test/Analysis/AliasAnalysis/ptr-component.fir

From 216ba6bc6c0d1b65543771420897f4d09beec704 Mon Sep 17 00:00:00 2001
From: Leandro Lupori <leandro.lupori@linaro.org>
Date: Mon, 26 Aug 2024 08:39:32 -0300
Subject: [PATCH 03/65] [flang][OpenMP] Privatize vars referenced in statement
 functions (#103390)

Variables referenced in the body of statement functions need to be
handled as if they are explicitly referenced. Otherwise, they are
skipped during implicit privatization, because statement functions
are represented as procedures in the parse tree.

To avoid missing symbols referenced only in statement functions
during implicit privatization, new symbols, associated with them,
are created and inserted into the context of the directive that
privatizes them. They are later collected and processed in
lowering. To avoid confusing these new symbols with regular ones,
they are tagged with the new OmpFromStmtFunction flag.

Fixes https://github.com/llvm/llvm-project/issues/74273
---
 flang/include/flang/Semantics/symbol.h        |   2 +-
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |   9 +
 flang/lib/Semantics/resolve-directives.cpp    | 313 ++++++++++--------
 .../test/Lower/OpenMP/statement-function.f90  |  43 +++
 4 files changed, 227 insertions(+), 140 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/statement-function.f90

diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index cf0350735b5b94..b4db6689a94271 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -755,7 +755,7 @@ class Symbol {
       OmpDeclarativeAllocateDirective, OmpExecutableAllocateDirective,
       OmpDeclareSimd, OmpDeclareTarget, OmpThreadprivate, OmpDeclareReduction,
       OmpFlushed, OmpCriticalLock, OmpIfSpecified, OmpNone, OmpPreDetermined,
-      OmpImplicit);
+      OmpImplicit, OmpFromStmtFunction);
   using Flags = common::EnumSet<Flag, Flag_enumSize>;
 
   const Scope &owner() const { return *owner_; }
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index e1a193edc004a7..1b2f926e21bed8 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -402,6 +402,15 @@ void DataSharingProcessor::collectSymbols(
                              /*collectSymbols=*/true,
                              /*collectHostAssociatedSymbols=*/true);
 
+  // Add implicitly referenced symbols from statement functions.
+  if (curScope) {
+    for (const auto &sym : curScope->GetSymbols()) {
+      if (sym->test(semantics::Symbol::Flag::OmpFromStmtFunction) &&
+          sym->test(flag))
+        allSymbols.insert(&*sym);
+    }
+  }
+
   llvm::SetVector<const semantics::Symbol *> symbolsInNestedRegions;
   collectSymbolsInNestedRegions(eval, flag, symbolsInNestedRegions);
 
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index cc9f1cc7ed2691..4aecb8b8e7b479 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -91,11 +91,12 @@ template <typename T> class DirectiveAttributeVisitor {
   void SetContextAssociatedLoopLevel(std::int64_t level) {
     GetContext().associatedLoopLevel = level;
   }
-  Symbol &MakeAssocSymbol(const SourceName &name, Symbol &prev, Scope &scope) {
+  Symbol &MakeAssocSymbol(
+      const SourceName &name, const Symbol &prev, Scope &scope) {
     const auto pair{scope.try_emplace(name, Attrs{}, HostAssocDetails{prev})};
     return *pair.first->second;
   }
-  Symbol &MakeAssocSymbol(const SourceName &name, Symbol &prev) {
+  Symbol &MakeAssocSymbol(const SourceName &name, const Symbol &prev) {
     return MakeAssocSymbol(name, prev, currScope());
   }
   void AddDataSharingAttributeObject(SymbolRef object) {
@@ -108,6 +109,7 @@ template <typename T> class DirectiveAttributeVisitor {
   const parser::Name *GetLoopIndex(const parser::DoConstruct &);
   const parser::DoConstruct *GetDoConstructIf(
       const parser::ExecutionPartConstruct &);
+  Symbol *DeclareNewPrivateAccessEntity(const Symbol &, Symbol::Flag, Scope &);
   Symbol *DeclarePrivateAccessEntity(
       const parser::Name &, Symbol::Flag, Scope &);
   Symbol *DeclarePrivateAccessEntity(Symbol &, Symbol::Flag, Scope &);
@@ -736,6 +738,9 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
       std::optional<common::OmpAtomicDefaultMemOrderType>);
   void IssueNonConformanceWarning(
       llvm::omp::Directive D, parser::CharBlock source);
+
+  void CreateImplicitSymbols(
+      const Symbol *symbol, std::optional<Symbol::Flag> setFlag = std::nullopt);
 };
 
 template <typename T>
@@ -771,6 +776,19 @@ const parser::DoConstruct *DirectiveAttributeVisitor<T>::GetDoConstructIf(
   return parser::Unwrap<parser::DoConstruct>(x);
 }
 
+template <typename T>
+Symbol *DirectiveAttributeVisitor<T>::DeclareNewPrivateAccessEntity(
+    const Symbol &object, Symbol::Flag flag, Scope &scope) {
+  assert(object.owner() != currScope());
+  auto &symbol{MakeAssocSymbol(object.name(), object, scope)};
+  symbol.set(flag);
+  if (flag == Symbol::Flag::OmpCopyIn) {
+    // The symbol in copyin clause must be threadprivate entity.
+    symbol.set(Symbol::Flag::OmpThreadprivate);
+  }
+  return &symbol;
+}
+
 template <typename T>
 Symbol *DirectiveAttributeVisitor<T>::DeclarePrivateAccessEntity(
     const parser::Name &name, Symbol::Flag flag, Scope &scope) {
@@ -785,13 +803,7 @@ template <typename T>
 Symbol *DirectiveAttributeVisitor<T>::DeclarePrivateAccessEntity(
     Symbol &object, Symbol::Flag flag, Scope &scope) {
   if (object.owner() != currScope()) {
-    auto &symbol{MakeAssocSymbol(object.name(), object, scope)};
-    symbol.set(flag);
-    if (flag == Symbol::Flag::OmpCopyIn) {
-      // The symbol in copyin clause must be threadprivate entity.
-      symbol.set(Symbol::Flag::OmpThreadprivate);
-    }
-    return &symbol;
+    return DeclareNewPrivateAccessEntity(object, flag, scope);
   } else {
     object.set(flag);
     return &object;
@@ -2031,24 +2043,152 @@ void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) {
   PopContext();
 }
 
+static bool IsPrivatizable(const Symbol *sym) {
+  auto *misc{sym->detailsIf<MiscDetails>()};
+  return !IsProcedure(*sym) && !IsNamedConstant(*sym) &&
+      !sym->owner().IsDerivedType() &&
+      sym->owner().kind() != Scope::Kind::ImpliedDos &&
+      !sym->detailsIf<semantics::AssocEntityDetails>() &&
+      !sym->detailsIf<semantics::NamelistDetails>() &&
+      (!misc ||
+          (misc->kind() != MiscDetails::Kind::ComplexPartRe &&
+              misc->kind() != MiscDetails::Kind::ComplexPartIm &&
+              misc->kind() != MiscDetails::Kind::KindParamInquiry &&
+              misc->kind() != MiscDetails::Kind::LenParamInquiry &&
+              misc->kind() != MiscDetails::Kind::ConstructName));
+}
+
+void OmpAttributeVisitor::CreateImplicitSymbols(
+    const Symbol *symbol, std::optional<Symbol::Flag> setFlag) {
+  if (!IsPrivatizable(symbol)) {
+    return;
+  }
+
+  // Implicitly determined DSAs
+  // OMP 5.2 5.1.1 - Variables Referenced in a Construct
+  Symbol *lastDeclSymbol = nullptr;
+  std::optional<Symbol::Flag> prevDSA;
+  for (int dirDepth{0}; dirDepth < (int)dirContext_.size(); ++dirDepth) {
+    DirContext &dirContext = dirContext_[dirDepth];
+    std::optional<Symbol::Flag> dsa;
+
+    for (auto symMap : dirContext.objectWithDSA) {
+      // if the `symbol` already has a data-sharing attribute
+      if (symMap.first->name() == symbol->name()) {
+        dsa = symMap.second;
+        break;
+      }
+    }
+
+    // When handling each implicit rule for a given symbol, one of the
+    // following 3 actions may be taken:
+    // 1. Declare a new private symbol.
+    // 2. Create a new association symbol with no flags, that will represent
+    //    a shared symbol in the current scope. Note that symbols without
+    //    any private flags are considered as shared.
+    // 3. Use the last declared private symbol, by inserting a new symbol
+    //    in the scope being processed, associated with it.
+    //    If no private symbol was declared previously, then no association
+    //    is needed and the symbol from the enclosing scope will be
+    //    inherited by the current one.
+    //
+    // Because of how symbols are collected in lowering, not inserting a new
+    // symbol in the last case could lead to the conclusion that a symbol
+    // from an enclosing construct was declared in the current construct,
+    // which would result in wrong privatization code being generated.
+    // Consider the following example:
+    //
+    // !$omp parallel default(private)              ! p1
+    //   !$omp parallel default(private) shared(x)  ! p2
+    //     x = 10
+    //   !$omp end parallel
+    // !$omp end parallel
+    //
+    // If a new x symbol was not inserted in the inner parallel construct
+    // (p2), it would use the x symbol definition from the enclosing scope.
+    // Then, when p2's default symbols were collected in lowering, the x
+    // symbol from the outer parallel construct (p1) would be collected, as
+    // it would have the private flag set.
+    // This would make x appear to be defined in p2, causing it to be
+    // privatized in p2 and its privatization in p1 to be skipped.
+    auto makePrivateSymbol = [&](Symbol::Flag flag) {
+      const Symbol *hostSymbol =
+          lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate();
+      lastDeclSymbol = DeclareNewPrivateAccessEntity(
+          *hostSymbol, flag, context_.FindScope(dirContext.directiveSource));
+      if (setFlag) {
+        lastDeclSymbol->set(*setFlag);
+      }
+      return lastDeclSymbol;
+    };
+    auto makeSharedSymbol = [&]() {
+      const Symbol *hostSymbol =
+          lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate();
+      MakeAssocSymbol(symbol->name(), *hostSymbol,
+          context_.FindScope(dirContext.directiveSource));
+    };
+    auto useLastDeclSymbol = [&]() {
+      if (lastDeclSymbol) {
+        makeSharedSymbol();
+      }
+    };
+
+    bool taskGenDir = llvm::omp::taskGeneratingSet.test(dirContext.directive);
+    bool targetDir = llvm::omp::allTargetSet.test(dirContext.directive);
+    bool parallelDir = llvm::omp::allParallelSet.test(dirContext.directive);
+    bool teamsDir = llvm::omp::allTeamsSet.test(dirContext.directive);
+
+    if (dsa.has_value()) {
+      if (dsa.value() == Symbol::Flag::OmpShared &&
+          (parallelDir || taskGenDir || teamsDir))
+        makeSharedSymbol();
+      // Private symbols will have been declared already.
+      prevDSA = dsa;
+      continue;
+    }
+
+    if (dirContext.defaultDSA == Symbol::Flag::OmpPrivate ||
+        dirContext.defaultDSA == Symbol::Flag::OmpFirstPrivate ||
+        dirContext.defaultDSA == Symbol::Flag::OmpShared) {
+      // 1) default
+      // Allowed only with parallel, teams and task generating constructs.
+      assert(parallelDir || taskGenDir || teamsDir);
+      if (dirContext.defaultDSA != Symbol::Flag::OmpShared)
+        makePrivateSymbol(dirContext.defaultDSA);
+      else
+        makeSharedSymbol();
+      dsa = dirContext.defaultDSA;
+    } else if (parallelDir) {
+      // 2) parallel -> shared
+      makeSharedSymbol();
+      dsa = Symbol::Flag::OmpShared;
+    } else if (!taskGenDir && !targetDir) {
+      // 3) enclosing context
+      useLastDeclSymbol();
+      dsa = prevDSA;
+    } else if (targetDir) {
+      // TODO 4) not mapped target variable -> firstprivate
+      dsa = prevDSA;
+    } else if (taskGenDir) {
+      // TODO 5) dummy arg in orphaned taskgen construct -> firstprivate
+      if (prevDSA == Symbol::Flag::OmpShared) {
+        // 6) shared in enclosing context -> shared
+        makeSharedSymbol();
+        dsa = Symbol::Flag::OmpShared;
+      } else {
+        // 7) firstprivate
+        dsa = Symbol::Flag::OmpFirstPrivate;
+        makePrivateSymbol(*dsa)->set(Symbol::Flag::OmpImplicit);
+      }
+    }
+    prevDSA = dsa;
+  }
+}
+
 // For OpenMP constructs, check all the data-refs within the constructs
 // and adjust the symbol for each Name if necessary
 void OmpAttributeVisitor::Post(const parser::Name &name) {
   auto *symbol{name.symbol};
-  auto IsPrivatizable = [](const Symbol *sym) {
-    auto *misc{sym->detailsIf<MiscDetails>()};
-    return !IsProcedure(*sym) && !IsNamedConstant(*sym) &&
-        !sym->owner().IsDerivedType() &&
-        sym->owner().kind() != Scope::Kind::ImpliedDos &&
-        !sym->detailsIf<semantics::AssocEntityDetails>() &&
-        !sym->detailsIf<semantics::NamelistDetails>() &&
-        (!misc ||
-            (misc->kind() != MiscDetails::Kind::ComplexPartRe &&
-                misc->kind() != MiscDetails::Kind::ComplexPartIm &&
-                misc->kind() != MiscDetails::Kind::KindParamInquiry &&
-                misc->kind() != MiscDetails::Kind::LenParamInquiry &&
-                misc->kind() != MiscDetails::Kind::ConstructName));
-  };
 
   if (symbol && !dirContext_.empty() && GetContext().withinConstruct) {
     if (IsPrivatizable(symbol) && !IsObjectWithDSA(*symbol)) {
@@ -2076,125 +2216,20 @@ void OmpAttributeVisitor::Post(const parser::Name &name) {
       if (found->test(semantics::Symbol::Flag::OmpThreadprivate))
         return;
     }
-    if (!IsPrivatizable(symbol)) {
-      return;
-    }
-
-    // Implicitly determined DSAs
-    // OMP 5.2 5.1.1 - Variables Referenced in a Construct
-    Symbol *lastDeclSymbol = nullptr;
-    std::optional<Symbol::Flag> prevDSA;
-    for (int dirDepth{0}; dirDepth < (int)dirContext_.size(); ++dirDepth) {
-      DirContext &dirContext = dirContext_[dirDepth];
-      std::optional<Symbol::Flag> dsa;
 
-      for (auto symMap : dirContext.objectWithDSA) {
-        // if the `symbol` already has a data-sharing attribute
-        if (symMap.first->name() == name.symbol->name()) {
-          dsa = symMap.second;
-          break;
-        }
-      }
-
-      // When handling each implicit rule for a given symbol, one of the
-      // following 3 actions may be taken:
-      // 1. Declare a new private symbol.
-      // 2. Create a new association symbol with no flags, that will represent
-      //    a shared symbol in the current scope. Note that symbols without
-      //    any private flags are considered as shared.
-      // 3. Use the last declared private symbol, by inserting a new symbol
-      //    in the scope being processed, associated with it.
-      //    If no private symbol was declared previously, then no association
-      //    is needed and the symbol from the enclosing scope will be
-      //    inherited by the current one.
-      //
-      // Because of how symbols are collected in lowering, not inserting a new
-      // symbol in the last case could lead to the conclusion that a symbol
-      // from an enclosing construct was declared in the current construct,
-      // which would result in wrong privatization code being generated.
-      // Consider the following example:
-      //
-      // !$omp parallel default(private)              ! p1
-      //   !$omp parallel default(private) shared(x)  ! p2
-      //     x = 10
-      //   !$omp end parallel
-      // !$omp end parallel
-      //
-      // If a new x symbol was not inserted in the inner parallel construct
-      // (p2), it would use the x symbol definition from the enclosing scope.
-      // Then, when p2's default symbols were collected in lowering, the x
-      // symbol from the outer parallel construct (p1) would be collected, as
-      // it would have the private flag set.
-      // This would make x appear to be defined in p2, causing it to be
-      // privatized in p2 and its privatization in p1 to be skipped.
-      auto makePrivateSymbol = [&](Symbol::Flag flag) {
-        Symbol *hostSymbol =
-            lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate();
-        lastDeclSymbol = DeclarePrivateAccessEntity(
-            *hostSymbol, flag, context_.FindScope(dirContext.directiveSource));
-        return lastDeclSymbol;
-      };
-      auto makeSharedSymbol = [&]() {
-        Symbol *hostSymbol =
-            lastDeclSymbol ? lastDeclSymbol : &symbol->GetUltimate();
-        MakeAssocSymbol(symbol->name(), *hostSymbol,
-            context_.FindScope(dirContext.directiveSource));
-      };
-      auto useLastDeclSymbol = [&]() {
-        if (lastDeclSymbol)
-          MakeAssocSymbol(symbol->name(), *lastDeclSymbol,
-              context_.FindScope(dirContext.directiveSource));
-      };
-
-      bool taskGenDir = llvm::omp::taskGeneratingSet.test(dirContext.directive);
-      bool targetDir = llvm::omp::allTargetSet.test(dirContext.directive);
-      bool parallelDir = llvm::omp::allParallelSet.test(dirContext.directive);
-      bool teamsDir = llvm::omp::allTeamsSet.test(dirContext.directive);
-
-      if (dsa.has_value()) {
-        if (dsa.value() == Symbol::Flag::OmpShared &&
-            (parallelDir || taskGenDir || teamsDir))
-          makeSharedSymbol();
-        // Private symbols will have been declared already.
-        prevDSA = dsa;
-        continue;
-      }
-
-      if (dirContext.defaultDSA == Symbol::Flag::OmpPrivate ||
-          dirContext.defaultDSA == Symbol::Flag::OmpFirstPrivate ||
-          dirContext.defaultDSA == Symbol::Flag::OmpShared) {
-        // 1) default
-        // Allowed only with parallel, teams and task generating constructs.
-        assert(parallelDir || taskGenDir || teamsDir);
-        if (dirContext.defaultDSA != Symbol::Flag::OmpShared)
-          makePrivateSymbol(dirContext.defaultDSA);
-        else
-          makeSharedSymbol();
-        dsa = dirContext.defaultDSA;
-      } else if (parallelDir) {
-        // 2) parallel -> shared
-        makeSharedSymbol();
-        dsa = Symbol::Flag::OmpShared;
-      } else if (!taskGenDir && !targetDir) {
-        // 3) enclosing context
-        useLastDeclSymbol();
-        dsa = prevDSA;
-      } else if (targetDir) {
-        // TODO 4) not mapped target variable -> firstprivate
-        dsa = prevDSA;
-      } else if (taskGenDir) {
-        // TODO 5) dummy arg in orphaned taskgen construct -> firstprivate
-        if (prevDSA == Symbol::Flag::OmpShared) {
-          // 6) shared in enclosing context -> shared
-          makeSharedSymbol();
-          dsa = Symbol::Flag::OmpShared;
-        } else {
-          // 7) firstprivate
-          dsa = Symbol::Flag::OmpFirstPrivate;
-          makePrivateSymbol(*dsa)->set(Symbol::Flag::OmpImplicit);
+    if (auto *stmtFunction{symbol->detailsIf<semantics::SubprogramDetails>()};
+        stmtFunction && stmtFunction->stmtFunction()) {
+      // Each non-dummy argument from a statement function must be handled too,
+      // as if it was explicitly referenced.
+      semantics::UnorderedSymbolSet symbols{
+          CollectSymbols(stmtFunction->stmtFunction().value())};
+      for (const auto &sym : symbols) {
+        if (!IsStmtFunctionDummy(sym) && !IsObjectWithDSA(*sym)) {
+          CreateImplicitSymbols(&*sym, Symbol::Flag::OmpFromStmtFunction);
         }
       }
-      prevDSA = dsa;
+    } else {
+      CreateImplicitSymbols(symbol);
     }
   } // within OpenMP construct
 }
diff --git a/flang/test/Lower/OpenMP/statement-function.f90 b/flang/test/Lower/OpenMP/statement-function.f90
new file mode 100644
index 00000000000000..6cdbcb6e141c7e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/statement-function.f90
@@ -0,0 +1,43 @@
+! Test privatization within OpenMP constructs containing statement functions.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK-LABEL: func @_QPtest_implicit_use
+!CHECK:         %[[IEXP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_useEiexp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:         %[[IIMP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_useEiimp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:         omp.parallel private({{.*firstprivate.*}} %[[IEXP]]#0 -> %[[PRIV_IEXP:.*]] : !fir.ref<i32>,
+!CHECK-SAME:                         {{.*firstprivate.*}} %[[IIMP]]#0 -> %[[PRIV_IIMP:.*]] : !fir.ref<i32>)
+!CHECK:           %{{.*}}:2 = hlfir.declare %[[PRIV_IEXP]] {uniq_name = "_QFtest_implicit_useEiexp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %{{.*}}:2 = hlfir.declare %[[PRIV_IIMP]] {uniq_name = "_QFtest_implicit_useEiimp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+subroutine test_implicit_use()
+  implicit none
+  integer :: iexp, iimp
+  integer, external :: ifun
+  integer :: sf
+
+  sf(iexp)=ifun(iimp)+iexp
+  !$omp parallel default(firstprivate)
+      iexp = sf(iexp)
+  !$omp end parallel
+end subroutine
+
+!CHECK-LABEL: func @_QPtest_implicit_use2
+!CHECK:         %[[IEXP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiexp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:         %[[IIMP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiimp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:         omp.task
+!CHECK:           %[[PRIV_IEXP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiexp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[TEMP0:.*]] = fir.load %[[IEXP]]#0 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[TEMP0]] to %[[PRIV_IEXP]]#0 temporary_lhs : i32, !fir.ref<i32>
+!CHECK:           %[[PRIV_IIMP:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest_implicit_use2Eiimp"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+!CHECK:           %[[TEMP1:.*]] = fir.load %[[IIMP]]#0 : !fir.ref<i32>
+!CHECK:           hlfir.assign %[[TEMP1]] to %[[PRIV_IIMP]]#0 temporary_lhs : i32, !fir.ref<i32>
+subroutine test_implicit_use2()
+  implicit none
+  integer :: iexp, iimp
+  integer, external :: ifun
+  integer :: sf
+
+  sf(iexp)=ifun(iimp)
+  !$omp task
+      iexp = sf(iexp)
+  !$omp end task
+end subroutine

From 3be955abbccaf1c9ca590834d5e5b27ab1f24b77 Mon Sep 17 00:00:00 2001
From: Luke Drummond <luke.drummond@codeplay.com>
Date: Mon, 26 Aug 2024 11:52:17 +0100
Subject: [PATCH 04/65] [NFC] Remove dead code

There's an early exit branch a couple of lines earlier for `MVT ==
f64`. Convert to an assert rather than using the duplicate ternary here.
This silences an opinionated static analyser that's been bugging me.
---
 llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index cb3fbdb850c1ac..4cf7733a260ff0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -745,8 +745,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   case Intrinsic::copysign:
     return NElts * getFullRateInstrCost();
   case Intrinsic::canonicalize: {
-    InstRate =
-        SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
+    assert(SLT != MVT::f64);
+    InstRate = getFullRateInstrCost();
     break;
   }
   case Intrinsic::uadd_sat:

From 2f91e98120f168b7ded6cb34d546dba178515cc4 Mon Sep 17 00:00:00 2001
From: Anton Sidorenko <anton.sidorenko@syntacore.com>
Date: Mon, 26 Aug 2024 15:11:24 +0300
Subject: [PATCH 05/65] [RISCV] Mark symbols used in inline asm for relocations
 as referenced (#104925)

Commit 5cd8d53cac00f taught RISCVMergeBaseOffset to handle inline asm,
however
there is at least one case uncovered for integrated as.

In the example below compiler generates pcrel relocation
(mcmodel=medany)
```
    volatile double double_val = 1.0;
    void foo() {
        asm volatile("fld f0, %0 \n\t" : : "m"(double_val) : "memory");
    }
```

And fails with the folliwng error
```
    error: could not find corresponding %pcrel_hi
          |       "fld f0, %0 \n\t"
    <inline asm>:1:2: note: instantiated into assembly here
          |         fld f0, %pcrel_lo(.Lpcrel_hi0)(a0)
```

After transformations MachineFunction contains inline asm instructions
with
'.Lpcrel_hi0' symbol that is not defined in inline asm, but referenced.
```
   ... = AUIPC ...(riscv-pcrel-hi) @double_val, pre-instr-symbol <mcsymbol .Lpcrel_hi0>
   INLINEASM &"fld f0, $0 \0A\09" ... target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi0>
```

So, when AsmParser processes 'fld', it has to create a new symbol as
'.Lpcrel_hi0' already exists but not known to be referenced in inline
asm.
AsmParser avoids conflicts by renaming referenced by 'fld' symbol with
'.Lpcrel_hi00' name which does not exist. Resulting erroneous asm
```
    .Lpcrel_hi0:
        auipc   a0, %pcrel_hi(double_val)
        #APP
        fld     ft0, %pcrel_lo(.Lpcrel_hi00)(a0)
```

This change adds symbols used in memory operands to the list of
referenced ones.

Godbolt link: https://godbolt.org/z/aqrrsWKoK -- on the left you can
find incorrect labels for the integrated-as and on the right an error
when compiling to the binary object.
---
 llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp     |    9 +
 .../RISCV/inline-asm-mem-constraint.ll        | 1188 ++++++-----------
 2 files changed, 399 insertions(+), 798 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 93677433c04405..476dde2be39e57 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -396,6 +396,15 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
     OS << MCO.getImm();
   else if (Offset.isGlobal() || Offset.isBlockAddress() || Offset.isMCSymbol())
     OS << *MCO.getExpr();
+
+  if (Offset.isMCSymbol())
+    MMI->getContext().registerInlineAsmLabel(Offset.getMCSymbol());
+  if (Offset.isBlockAddress()) {
+    const BlockAddress *BA = Offset.getBlockAddress();
+    MCSymbol *Sym = GetBlockAddressSymbol(BA);
+    MMI->getContext().registerInlineAsmLabel(Sym);
+  }
+
   OS << "(" << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")";
   return false;
 }
diff --git a/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll b/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll
index e34df9b1c01f25..7fae0ca692669e 100644
--- a/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll
+++ b/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll
@@ -129,41 +129,23 @@ define void @constraint_m_with_global_1() nounwind {
 ; RV64I-NEXT:    #NO_APP
 ; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_1:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi0)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_1:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi0)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_1:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi00)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_m_with_global_1:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi0:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi0)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_1:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi00)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_m_with_global_1:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi0:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi0)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) @eg)
   ret void
 }
@@ -185,41 +167,23 @@ define void @constraint_m_with_global_2() nounwind {
 ; RV64I-NEXT:    #NO_APP
 ; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_2:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi1:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi1)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_2:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi1:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi1)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_2:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi1:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi110)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_m_with_global_2:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi1:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+4)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi1)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_2:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi1:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi110)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_m_with_global_2:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi1:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+4)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi1)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 1))
   ret void
 }
@@ -241,41 +205,23 @@ define void @constraint_m_with_global_3() nounwind {
 ; RV64I-NEXT:    #NO_APP
 ; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_3:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi2:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi2)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_global_3:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi2:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi2)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_3:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi2:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi210)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_m_with_global_3:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi2:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+8000)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi2)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_global_3:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi2:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi210)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_m_with_global_3:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi2:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+8000)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi2)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 2000))
   ret void
 }
@@ -407,89 +353,47 @@ define void @constraint_m_with_extern_weak_global_3() nounwind {
 }
 
 define void @constraint_m_with_local_1() nounwind {
-; RV32I-NO-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV32I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-NO-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV32I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp0)
-; RV32I-NO-INTEGRATED-NEXT:    #APP
-; RV32I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp0)(a0)
-; RV32I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-NO-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV64I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-NO-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV64I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp0)
-; RV64I-NO-INTEGRATED-NEXT:    #APP
-; RV64I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp0)(a0)
-; RV64I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi6:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi6)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi6:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi6)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV32I-INTEGRATED:       # %bb.0: # %entry
-; RV32I-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV32I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp0)
-; RV32I-INTEGRATED-NEXT:    #APP
-; RV32I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp00)(a0)
-; RV32I-INTEGRATED-NEXT:    #NO_APP
-; RV32I-INTEGRATED-NEXT:    ret
+; RV32I-LABEL: constraint_m_with_local_1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:  .Ltmp0: # Block address taken
+; RV32I-NEXT:  # %bb.1: # %label
+; RV32I-NEXT:    lui a0, %hi(.Ltmp0)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    lw zero, %lo(.Ltmp0)(a0)
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
 ;
-; RV64I-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV64I-INTEGRATED:       # %bb.0: # %entry
-; RV64I-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV64I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp0)
-; RV64I-INTEGRATED-NEXT:    #APP
-; RV64I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp00)(a0)
-; RV64I-INTEGRATED-NEXT:    #NO_APP
-; RV64I-INTEGRATED-NEXT:    ret
+; RV64I-LABEL: constraint_m_with_local_1:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:  .Ltmp0: # Block address taken
+; RV64I-NEXT:  # %bb.1: # %label
+; RV64I-NEXT:    lui a0, %hi(.Ltmp0)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    lw zero, %lo(.Ltmp0)(a0)
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi6:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi60)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_m_with_local_1:
+; RV32I-MEDIUM:       # %bb.0: # %entry
+; RV32I-MEDIUM-NEXT:  .Ltmp0: # Block address taken
+; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi6:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi6)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_1:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Ltmp0: # Block address taken
-; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi6:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi60)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_m_with_local_1:
+; RV64I-MEDIUM:       # %bb.0: # %entry
+; RV64I-MEDIUM-NEXT:  .Ltmp0: # Block address taken
+; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi6:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi6)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
 entry:
   br label %label
 
@@ -499,89 +403,47 @@ label:
 }
 
 define void @constraint_m_with_local_2() nounwind {
-; RV32I-NO-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV32I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-NO-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV32I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp1+4)
-; RV32I-NO-INTEGRATED-NEXT:    #APP
-; RV32I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp1+4)(a0)
-; RV32I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-NO-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV64I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-NO-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV64I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp1+4)
-; RV64I-NO-INTEGRATED-NEXT:    #APP
-; RV64I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp1+4)(a0)
-; RV64I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi7:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+4)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi7)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi7:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+4)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi7)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV32I-INTEGRATED:       # %bb.0: # %entry
-; RV32I-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV32I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp1+4)
-; RV32I-INTEGRATED-NEXT:    #APP
-; RV32I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp10+4)(a0)
-; RV32I-INTEGRATED-NEXT:    #NO_APP
-; RV32I-INTEGRATED-NEXT:    ret
+; RV32I-LABEL: constraint_m_with_local_2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:  .Ltmp1: # Block address taken
+; RV32I-NEXT:  # %bb.1: # %label
+; RV32I-NEXT:    lui a0, %hi(.Ltmp1+4)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    lw zero, %lo(.Ltmp1+4)(a0)
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
 ;
-; RV64I-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV64I-INTEGRATED:       # %bb.0: # %entry
-; RV64I-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV64I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp1+4)
-; RV64I-INTEGRATED-NEXT:    #APP
-; RV64I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp10+4)(a0)
-; RV64I-INTEGRATED-NEXT:    #NO_APP
-; RV64I-INTEGRATED-NEXT:    ret
+; RV64I-LABEL: constraint_m_with_local_2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:  .Ltmp1: # Block address taken
+; RV64I-NEXT:  # %bb.1: # %label
+; RV64I-NEXT:    lui a0, %hi(.Ltmp1+4)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    lw zero, %lo(.Ltmp1+4)(a0)
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi7:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+4)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi70)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_m_with_local_2:
+; RV32I-MEDIUM:       # %bb.0: # %entry
+; RV32I-MEDIUM-NEXT:  .Ltmp1: # Block address taken
+; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi7:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+4)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi7)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_2:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Ltmp1: # Block address taken
-; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi7:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+4)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi70)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_m_with_local_2:
+; RV64I-MEDIUM:       # %bb.0: # %entry
+; RV64I-MEDIUM-NEXT:  .Ltmp1: # Block address taken
+; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi7:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+4)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi7)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
 entry:
   br label %label
 
@@ -591,89 +453,47 @@ label:
 }
 
 define void @constraint_m_with_local_3() nounwind {
-; RV32I-NO-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV32I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-NO-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV32I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp2+2000)
-; RV32I-NO-INTEGRATED-NEXT:    #APP
-; RV32I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp2+2000)(a0)
-; RV32I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-NO-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV64I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-NO-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV64I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp2+2000)
-; RV64I-NO-INTEGRATED-NEXT:    #APP
-; RV64I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp2+2000)(a0)
-; RV64I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi8:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp2+2000)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi8)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi8:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp2+2000)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi8)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV32I-INTEGRATED:       # %bb.0: # %entry
-; RV32I-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV32I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp2+2000)
-; RV32I-INTEGRATED-NEXT:    #APP
-; RV32I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp20+2000)(a0)
-; RV32I-INTEGRATED-NEXT:    #NO_APP
-; RV32I-INTEGRATED-NEXT:    ret
+; RV32I-LABEL: constraint_m_with_local_3:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:  .Ltmp2: # Block address taken
+; RV32I-NEXT:  # %bb.1: # %label
+; RV32I-NEXT:    lui a0, %hi(.Ltmp2+2000)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    lw zero, %lo(.Ltmp2+2000)(a0)
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
 ;
-; RV64I-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV64I-INTEGRATED:       # %bb.0: # %entry
-; RV64I-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV64I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp2+2000)
-; RV64I-INTEGRATED-NEXT:    #APP
-; RV64I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp20+2000)(a0)
-; RV64I-INTEGRATED-NEXT:    #NO_APP
-; RV64I-INTEGRATED-NEXT:    ret
+; RV64I-LABEL: constraint_m_with_local_3:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:  .Ltmp2: # Block address taken
+; RV64I-NEXT:  # %bb.1: # %label
+; RV64I-NEXT:    lui a0, %hi(.Ltmp2+2000)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    lw zero, %lo(.Ltmp2+2000)(a0)
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi8:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp2+2000)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi80)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_m_with_local_3:
+; RV32I-MEDIUM:       # %bb.0: # %entry
+; RV32I-MEDIUM-NEXT:  .Ltmp2: # Block address taken
+; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi8:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp2+2000)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi8)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_local_3:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Ltmp2: # Block address taken
-; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi8:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp2+2000)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi80)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_m_with_local_3:
+; RV64I-MEDIUM:       # %bb.0: # %entry
+; RV64I-MEDIUM-NEXT:  .Ltmp2: # Block address taken
+; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi8:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp2+2000)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi8)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
 entry:
   br label %label
 
@@ -740,8 +560,8 @@ define void @constraint_m_with_multi_operands() nounwind {
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi9:
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi90)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi90)(a0)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi9)(a0)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi9)(a0)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
 ;
@@ -750,8 +570,8 @@ define void @constraint_m_with_multi_operands() nounwind {
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi9:
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi90)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi90)(a0)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi9)(a0)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi9)(a0)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
   call void asm "sw zero, $0; sw zero, $1", "=*m,=*m"(ptr elementtype(i32) @eg, ptr elementtype(i32) @eg)
@@ -781,53 +601,29 @@ define void @constraint_m_with_multi_asm() nounwind {
 ; RV64I-NEXT:    #NO_APP
 ; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_multi_asm:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi10:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_m_with_multi_asm:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi10:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_multi_asm:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi10:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi100)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi100)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_m_with_multi_asm:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi10:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_m_with_multi_asm:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi10:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi100)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi100)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_m_with_multi_asm:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi10:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi10)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) @eg)
   call void asm "sw zero, $0", "=*m"(ptr elementtype(i32) @eg)
   ret void
@@ -935,8 +731,8 @@ define i32 @constraint_m_with_callbr_multi_operands(i32 %a) {
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi11:
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi111)(a1)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi111)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi11)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi11)(a1)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB14_2
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal
@@ -953,8 +749,8 @@ define i32 @constraint_m_with_callbr_multi_operands(i32 %a) {
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi11:
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi111)(a1)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi111)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi11)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi11)(a1)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB14_2
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal
@@ -1101,12 +897,12 @@ define i32 @constraint_m_with_multi_callbr_asm(i32 %a) {
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi12:
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi120)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi12)(a1)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB15_3
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal0
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi120)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi12)(a1)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB15_3
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.2: # %normal1
@@ -1123,12 +919,12 @@ define i32 @constraint_m_with_multi_callbr_asm(i32 %a) {
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi12:
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi120)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi12)(a1)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB15_3
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal0
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi120)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi12)(a1)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB15_3
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.2: # %normal1
@@ -1262,41 +1058,23 @@ define void @constraint_o_with_global_1() nounwind {
 ; RV64I-NEXT:    #NO_APP
 ; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_1:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi13:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi13)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_1:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi13:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi13)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_1:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi13:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi130)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_o_with_global_1:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi13:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi13)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_1:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi13:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi130)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_o_with_global_1:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi13:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi13)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) @eg)
   ret void
 }
@@ -1316,43 +1094,25 @@ define void @constraint_o_with_global_2() nounwind {
 ; RV64I-NEXT:    #APP
 ; RV64I-NEXT:    sw zero, %lo(eg+4)(a0)
 ; RV64I-NEXT:    #NO_APP
-; RV64I-NEXT:    ret
-;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_2:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi14:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi14)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_2:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi14:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi14)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_2:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi14:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi140)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_2:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi14:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+4)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi140)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_o_with_global_2:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi14:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+4)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi14)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
+;
+; RV64I-MEDIUM-LABEL: constraint_o_with_global_2:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi14:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+4)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi14)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 1))
   ret void
 }
@@ -1374,41 +1134,23 @@ define void @constraint_o_with_global_3() nounwind {
 ; RV64I-NEXT:    #NO_APP
 ; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_3:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi15:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi15)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_global_3:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi15:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi15)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_3:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi15:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi150)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_o_with_global_3:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi15:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+8000)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi15)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_global_3:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi15:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg+8000)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi150)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_o_with_global_3:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi15:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg+8000)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi15)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) getelementptr ([400000 x i32], ptr @eg, i32 0, i32 2000))
   ret void
 }
@@ -1562,53 +1304,29 @@ define void @constraint_o_with_multi_asm() nounwind {
 ; RV64I-NEXT:    #NO_APP
 ; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_multi_asm:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi19:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_multi_asm:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi19:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_multi_asm:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi19:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi190)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi190)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_o_with_multi_asm:
+; RV32I-MEDIUM:       # %bb.0:
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi19:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_multi_asm:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0:
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi19:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(eg)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi190)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi190)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_o_with_multi_asm:
+; RV64I-MEDIUM:       # %bb.0:
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi19:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(eg)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi19)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
   call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) @eg)
   call void asm "sw zero, $0", "=*o"(ptr elementtype(i32) @eg)
   ret void
@@ -1716,8 +1434,8 @@ define i32 @constraint_o_with_callbr_multi_operands(i32 %a) {
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi20:
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi200)(a1)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi200)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi20)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi20)(a1)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB26_2
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal
@@ -1734,8 +1452,8 @@ define i32 @constraint_o_with_callbr_multi_operands(i32 %a) {
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi20:
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi200)(a1)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi200)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi20)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi20)(a1)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB26_2
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal
@@ -1882,12 +1600,12 @@ define i32 @constraint_o_with_multi_callbr_asm(i32 %a) {
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi21:
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi211)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi21)(a1)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB27_3
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal0
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi211)(a1)
+; RV32I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi21)(a1)
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB27_3
 ; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.2: # %normal1
@@ -1904,12 +1622,12 @@ define i32 @constraint_o_with_multi_callbr_asm(i32 %a) {
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi21:
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a1, %pcrel_hi(eg)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi211)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi21)(a1)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB27_3
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %normal0
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi211)(a1)
+; RV64I-MEDIUM-INTEGRATED-NEXT:    sw zero, %pcrel_lo(.Lpcrel_hi21)(a1)
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    beqz a0, .LBB27_3
 ; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
 ; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.2: # %normal1
@@ -1934,89 +1652,47 @@ fail:
 }
 
 define void @constraint_o_with_local_1() nounwind {
-; RV32I-NO-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV32I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-NO-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV32I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp3)
-; RV32I-NO-INTEGRATED-NEXT:    #APP
-; RV32I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp3)(a0)
-; RV32I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-NO-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV64I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-NO-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV64I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp3)
-; RV64I-NO-INTEGRATED-NEXT:    #APP
-; RV64I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp3)(a0)
-; RV64I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi22:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp3)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi22)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi22:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp3)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi22)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV32I-INTEGRATED:       # %bb.0: # %entry
-; RV32I-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV32I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp3)
-; RV32I-INTEGRATED-NEXT:    #APP
-; RV32I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp30)(a0)
-; RV32I-INTEGRATED-NEXT:    #NO_APP
-; RV32I-INTEGRATED-NEXT:    ret
+; RV32I-LABEL: constraint_o_with_local_1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:  .Ltmp3: # Block address taken
+; RV32I-NEXT:  # %bb.1: # %label
+; RV32I-NEXT:    lui a0, %hi(.Ltmp3)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    lw zero, %lo(.Ltmp3)(a0)
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
 ;
-; RV64I-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV64I-INTEGRATED:       # %bb.0: # %entry
-; RV64I-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV64I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp3)
-; RV64I-INTEGRATED-NEXT:    #APP
-; RV64I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp30)(a0)
-; RV64I-INTEGRATED-NEXT:    #NO_APP
-; RV64I-INTEGRATED-NEXT:    ret
+; RV64I-LABEL: constraint_o_with_local_1:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:  .Ltmp3: # Block address taken
+; RV64I-NEXT:  # %bb.1: # %label
+; RV64I-NEXT:    lui a0, %hi(.Ltmp3)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    lw zero, %lo(.Ltmp3)(a0)
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi22:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp3)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi220)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_o_with_local_1:
+; RV32I-MEDIUM:       # %bb.0: # %entry
+; RV32I-MEDIUM-NEXT:  .Ltmp3: # Block address taken
+; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi22:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp3)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi22)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_1:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Ltmp3: # Block address taken
-; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi22:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp3)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi220)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_o_with_local_1:
+; RV64I-MEDIUM:       # %bb.0: # %entry
+; RV64I-MEDIUM-NEXT:  .Ltmp3: # Block address taken
+; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi22:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp3)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi22)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
 entry:
   br label %label
 
@@ -2026,89 +1702,47 @@ label:
 }
 
 define void @constraint_o_with_local_2() nounwind {
-; RV32I-NO-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV32I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-NO-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV32I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp4+4)
-; RV32I-NO-INTEGRATED-NEXT:    #APP
-; RV32I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp4+4)(a0)
-; RV32I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-NO-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV64I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-NO-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV64I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp4+4)
-; RV64I-NO-INTEGRATED-NEXT:    #APP
-; RV64I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp4+4)(a0)
-; RV64I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi23:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp4+4)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi23)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi23:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp4+4)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi23)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV32I-INTEGRATED:       # %bb.0: # %entry
-; RV32I-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV32I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp4+4)
-; RV32I-INTEGRATED-NEXT:    #APP
-; RV32I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp40+4)(a0)
-; RV32I-INTEGRATED-NEXT:    #NO_APP
-; RV32I-INTEGRATED-NEXT:    ret
+; RV32I-LABEL: constraint_o_with_local_2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:  .Ltmp4: # Block address taken
+; RV32I-NEXT:  # %bb.1: # %label
+; RV32I-NEXT:    lui a0, %hi(.Ltmp4+4)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    lw zero, %lo(.Ltmp4+4)(a0)
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
 ;
-; RV64I-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV64I-INTEGRATED:       # %bb.0: # %entry
-; RV64I-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV64I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp4+4)
-; RV64I-INTEGRATED-NEXT:    #APP
-; RV64I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp40+4)(a0)
-; RV64I-INTEGRATED-NEXT:    #NO_APP
-; RV64I-INTEGRATED-NEXT:    ret
+; RV64I-LABEL: constraint_o_with_local_2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:  .Ltmp4: # Block address taken
+; RV64I-NEXT:  # %bb.1: # %label
+; RV64I-NEXT:    lui a0, %hi(.Ltmp4+4)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    lw zero, %lo(.Ltmp4+4)(a0)
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi23:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp4+4)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi230)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_o_with_local_2:
+; RV32I-MEDIUM:       # %bb.0: # %entry
+; RV32I-MEDIUM-NEXT:  .Ltmp4: # Block address taken
+; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi23:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp4+4)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi23)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_2:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Ltmp4: # Block address taken
-; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi23:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp4+4)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi230)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_o_with_local_2:
+; RV64I-MEDIUM:       # %bb.0: # %entry
+; RV64I-MEDIUM-NEXT:  .Ltmp4: # Block address taken
+; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi23:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp4+4)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi23)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
 entry:
   br label %label
 
@@ -2118,89 +1752,47 @@ label:
 }
 
 define void @constraint_o_with_local_3() nounwind {
-; RV32I-NO-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV32I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-NO-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV32I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp5+2000)
-; RV32I-NO-INTEGRATED-NEXT:    #APP
-; RV32I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp5+2000)(a0)
-; RV32I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-NO-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV64I-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-NO-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV64I-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-NO-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp5+2000)
-; RV64I-NO-INTEGRATED-NEXT:    #APP
-; RV64I-NO-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp5+2000)(a0)
-; RV64I-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV32I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi24:
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp5+2000)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi24)(a0)
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV64I-MEDIUM-NO-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV64I-MEDIUM-NO-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:  .Lpcrel_hi24:
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp5+2000)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi24)(a0)
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-NO-INTEGRATED-NEXT:    ret
-;
-; RV32I-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV32I-INTEGRATED:       # %bb.0: # %entry
-; RV32I-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV32I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp5+2000)
-; RV32I-INTEGRATED-NEXT:    #APP
-; RV32I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp50+2000)(a0)
-; RV32I-INTEGRATED-NEXT:    #NO_APP
-; RV32I-INTEGRATED-NEXT:    ret
+; RV32I-LABEL: constraint_o_with_local_3:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:  .Ltmp5: # Block address taken
+; RV32I-NEXT:  # %bb.1: # %label
+; RV32I-NEXT:    lui a0, %hi(.Ltmp5+2000)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    lw zero, %lo(.Ltmp5+2000)(a0)
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
 ;
-; RV64I-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV64I-INTEGRATED:       # %bb.0: # %entry
-; RV64I-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV64I-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-INTEGRATED-NEXT:    lui a0, %hi(.Ltmp5+2000)
-; RV64I-INTEGRATED-NEXT:    #APP
-; RV64I-INTEGRATED-NEXT:    lw zero, %lo(.Ltmp50+2000)(a0)
-; RV64I-INTEGRATED-NEXT:    #NO_APP
-; RV64I-INTEGRATED-NEXT:    ret
+; RV64I-LABEL: constraint_o_with_local_3:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:  .Ltmp5: # Block address taken
+; RV64I-NEXT:  # %bb.1: # %label
+; RV64I-NEXT:    lui a0, %hi(.Ltmp5+2000)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    lw zero, %lo(.Ltmp5+2000)(a0)
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
 ;
-; RV32I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV32I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV32I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV32I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi24:
-; RV32I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp5+2000)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi240)(a0)
-; RV32I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV32I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV32I-MEDIUM-LABEL: constraint_o_with_local_3:
+; RV32I-MEDIUM:       # %bb.0: # %entry
+; RV32I-MEDIUM-NEXT:  .Ltmp5: # Block address taken
+; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi24:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp5+2000)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi24)(a0)
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
 ;
-; RV64I-MEDIUM-INTEGRATED-LABEL: constraint_o_with_local_3:
-; RV64I-MEDIUM-INTEGRATED:       # %bb.0: # %entry
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Ltmp5: # Block address taken
-; RV64I-MEDIUM-INTEGRATED-NEXT:  # %bb.1: # %label
-; RV64I-MEDIUM-INTEGRATED-NEXT:  .Lpcrel_hi24:
-; RV64I-MEDIUM-INTEGRATED-NEXT:    auipc a0, %pcrel_hi(.Ltmp5+2000)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi240)(a0)
-; RV64I-MEDIUM-INTEGRATED-NEXT:    #NO_APP
-; RV64I-MEDIUM-INTEGRATED-NEXT:    ret
+; RV64I-MEDIUM-LABEL: constraint_o_with_local_3:
+; RV64I-MEDIUM:       # %bb.0: # %entry
+; RV64I-MEDIUM-NEXT:  .Ltmp5: # Block address taken
+; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi24:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp5+2000)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    lw zero, %pcrel_lo(.Lpcrel_hi24)(a0)
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
 entry:
   br label %label
 

From 5afd39d6e4df7e1f4f8c6f7bb6e2cda775beed6a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 26 Aug 2024 14:33:00 +0200
Subject: [PATCH 06/65] [InstCombine] Add test for op of phi in loop (NFC)

---
 llvm/test/Transforms/InstCombine/phi.ll | 28 +++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index e03e45312687bc..673c8f6c9488d6 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -2714,3 +2714,31 @@ join:
   %cmp = icmp slt i32 %13, 0
   ret i1 %cmp
 }
+
+define void @phi_op_in_loop(i1 %c, i32 %x) {
+; CHECK-LABEL: @phi_op_in_loop(
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[LOOP_LATCH:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[X:%.*]], [[IF]] ], [ 0, [[LOOP]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[PHI]], 1
+; CHECK-NEXT:    call void @use(i32 [[AND]])
+; CHECK-NEXT:    br label [[LOOP]]
+;
+  br label %loop
+
+loop:
+  br i1 %c, label %if, label %loop.latch
+
+if:
+  br label %loop.latch
+
+loop.latch:
+  %phi = phi i32 [ %x, %if ], [ 0, %loop ]
+  %and = and i32 %phi, 1
+  call void @use(i32 %and)
+  br label %loop
+}

From 914fa6727f712966e4fdaa2f9a4a99ab3321d1d0 Mon Sep 17 00:00:00 2001
From: Kelvin Li <kkwli@users.noreply.github.com>
Date: Mon, 26 Aug 2024 08:37:34 -0400
Subject: [PATCH 07/65] [flang] Add target=sparc check for big endian (NFC)
 (#105854)

---
 flang/test/Semantics/data08.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/test/Semantics/data08.f90 b/flang/test/Semantics/data08.f90
index 7e12a71d117728..84cd6d1c125dbb 100644
--- a/flang/test/Semantics/data08.f90
+++ b/flang/test/Semantics/data08.f90
@@ -1,5 +1,5 @@
 ! RUN: %flang_fc1 -fdebug-dump-symbols -pedantic %s 2>&1 | FileCheck %s \
-! RUN:   --check-prefixes=%if system-aix %{"CHECK","BE"%} \
+! RUN:   --check-prefixes=%if target={{.*-aix.*|sparc.*}} %{"CHECK","BE"%} \
 ! RUN:                    %else %{"CHECK","LE"%}
 
 ! CHECK: DATA statement value initializes 'jx' of type 'INTEGER(4)' with CHARACTER

From 95b37a76493a1cd4b607f53f4318b5da5b5392f0 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne@gmail.com>
Date: Mon, 26 Aug 2024 14:38:54 +0200
Subject: [PATCH 08/65] [analyzer] Detect leak of a stack address through
 output arguments 2/3 (#105653)

At this point, only functions called from other functions (i.e., not
top-level) are covered. Top-level functions have a different exit
sequence and will be handled by a subsequent change.

CPP-4734

-------

This is the second of three commits constituting
https://github.com/llvm/llvm-project/pull/105648
it must not be merged before
https://github.com/llvm/llvm-project/pull/105652
---
 .../Checkers/StackAddrEscapeChecker.cpp       | 64 ++++++++++++++-----
 clang/test/Analysis/stack-addr-ps.cpp         | 31 +++++++--
 2 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index 2bd4ca4528de8b..dcf6801a73de2d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -288,6 +288,23 @@ void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
   EmitStackError(C, R, RetE);
 }
 
+static const MemSpaceRegion *getStackOrGlobalSpaceRegion(const MemRegion *R) {
+  assert(R);
+  if (const auto *MemSpace = R->getMemorySpace()) {
+    if (const auto *SSR = MemSpace->getAs<StackSpaceRegion>())
+      return SSR;
+    if (const auto *GSR = MemSpace->getAs<GlobalsSpaceRegion>())
+      return GSR;
+  }
+  // If R describes a lambda capture, it will be a symbolic region
+  // referring to a field region of another symbolic region.
+  if (const auto *SymReg = R->getBaseRegion()->getAs<SymbolicRegion>()) {
+    if (const auto *OriginReg = SymReg->getSymbol()->getOriginRegion())
+      return getStackOrGlobalSpaceRegion(OriginReg);
+  }
+  return nullptr;
+}
+
 std::optional<std::string> printReferrer(const MemRegion *Referrer) {
   assert(Referrer);
   const StringRef ReferrerMemorySpace = [](const MemSpaceRegion *Space) {
@@ -297,20 +314,31 @@ std::optional<std::string> printReferrer(const MemRegion *Referrer) {
       return "global";
     assert(isa<StackSpaceRegion>(Space));
     return "stack";
-  }(Referrer->getMemorySpace());
-
-  // We should really only have VarRegions here.
-  // Anything else is really surprising, and we should get notified if such
-  // ever happens.
-  const auto *ReferrerVar = dyn_cast<VarRegion>(Referrer);
-  if (!ReferrerVar) {
-    assert(false && "We should have a VarRegion here");
-    return std::nullopt; // Defensively skip this one.
+  }(getStackOrGlobalSpaceRegion(Referrer));
+
+  while (!Referrer->canPrintPretty()) {
+    if (const auto *SymReg = dyn_cast<SymbolicRegion>(Referrer);
+        SymReg && SymReg->getSymbol()->getOriginRegion()) {
+      Referrer = SymReg->getSymbol()->getOriginRegion()->getBaseRegion();
+    } else if (isa<CXXThisRegion>(Referrer)) {
+      // Skip members of a class, it is handled in CheckExprLifetime.cpp as
+      // warn_bind_ref_member_to_parameter or
+      // warn_init_ptr_member_to_parameter_addr
+      return std::nullopt;
+    } else {
+      Referrer->dump();
+      assert(false && "Unexpected referrer region type.");
+      return std::nullopt;
+    }
   }
-  const std::string ReferrerVarName =
-      ReferrerVar->getDecl()->getDeclName().getAsString();
+  assert(Referrer);
+  assert(Referrer->canPrintPretty());
 
-  return (ReferrerMemorySpace + " variable '" + ReferrerVarName + "'").str();
+  std::string buf;
+  llvm::raw_string_ostream os(buf);
+  os << ReferrerMemorySpace << " variable ";
+  Referrer->printPretty(os);
+  return buf;
 }
 
 void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
@@ -332,16 +360,20 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
     /// referred by an other stack variable from different stack frame.
     bool checkForDanglingStackVariable(const MemRegion *Referrer,
                                        const MemRegion *Referred) {
-      const auto *ReferrerMemSpace =
-          Referrer->getMemorySpace()->getAs<StackSpaceRegion>();
+      const auto *ReferrerMemSpace = getStackOrGlobalSpaceRegion(Referrer);
       const auto *ReferredMemSpace =
           Referred->getMemorySpace()->getAs<StackSpaceRegion>();
 
       if (!ReferrerMemSpace || !ReferredMemSpace)
         return false;
 
+      const auto *ReferrerStackSpace =
+          ReferrerMemSpace->getAs<StackSpaceRegion>();
+      if (!ReferrerStackSpace)
+        return false;
+
       if (ReferredMemSpace->getStackFrame() == PoppedFrame &&
-          ReferrerMemSpace->getStackFrame()->isParentOf(PoppedFrame)) {
+          ReferrerStackSpace->getStackFrame()->isParentOf(PoppedFrame)) {
         V.emplace_back(Referrer, Referred);
         return true;
       }
@@ -387,7 +419,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
   if (!BT_stackleak)
     BT_stackleak =
         std::make_unique<BugType>(CheckNames[CK_StackAddrEscapeChecker],
-                                  "Stack address stored into global variable");
+                                  "Stack address leaks outside of stack frame");
 
   for (const auto &P : Cb.V) {
     const MemRegion *Referrer = P.first->getBaseRegion();
diff --git a/clang/test/Analysis/stack-addr-ps.cpp b/clang/test/Analysis/stack-addr-ps.cpp
index 68ccc322bf2ef2..95a6e3cbd25c7c 100644
--- a/clang/test/Analysis/stack-addr-ps.cpp
+++ b/clang/test/Analysis/stack-addr-ps.cpp
@@ -1,7 +1,10 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s -Wno-undefined-bool-conversion
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s -Wno-undefined-bool-conversion
 
 typedef __INTPTR_TYPE__ intptr_t;
 
+template <typename T>
+void clang_analyzer_dump(T x);
+
 const int& g() {
   int s;
   return s; // expected-warning{{Address of stack memory associated with local variable 's' returned}} expected-warning{{reference to stack memory associated with local variable 's' returned}}
@@ -321,7 +324,7 @@ void param_ptr_to_ptr_to_ptr_top(void*** ppp) {
 
 void param_ptr_to_ptr_to_ptr_callee(void*** ppp) {
   int local = 42;
-  **ppp = &local; // no-warning FIXME
+  **ppp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'pp'}}
 }
 
 void param_ptr_to_ptr_to_ptr_caller(void** pp) {
@@ -331,7 +334,7 @@ void param_ptr_to_ptr_to_ptr_caller(void** pp) {
 void lambda_to_context_ptr_to_ptr(int **pp) {
   auto lambda = [&] {
     int local = 42;
-    *pp = &local; // no-warning FIXME
+    *pp = &local; // expected-warning{{local variable 'local' is still referred to by the stack variable 'pp'}}
   };
   lambda();
   (void)*pp;
@@ -734,7 +737,7 @@ void param_nested_and_transitive_top(NestedAndTransitive* nat) {
 
 void param_nested_and_transitive_callee(NestedAndTransitive* nat) {
   int local = 42;
-  *nat->next[2]->next[1]->p = &local; // no-warning FIXME
+  *nat->next[2]->next[1]->p = &local;  // expected-warning{{local variable 'local' is still referred to by the stack variable 'natCaller'}}
 }
 
 void param_nested_and_transitive_caller(NestedAndTransitive natCaller) {
@@ -757,3 +760,23 @@ class CPtr {
   }
 };
 } // namespace leaking_as_member
+
+namespace origin_region_limitation {
+void leaker(int ***leakerArg) {
+    int local;
+    clang_analyzer_dump(*leakerArg); // expected-warning{{&SymRegion{reg_$0<int ** arg>}}}
+    // Incorrect message: 'arg', after it is reinitialized with value returned by 'tweak'
+    // is no longer relevant.
+    // The message must refer to 'original_arg' instead, but there is no easy way to
+    // connect the SymRegion stored in 'original_arg' and 'original_arg' as variable.
+    **leakerArg = &local; // expected-warning{{ 'local' is still referred to by the stack variable 'arg'}}
+}
+
+int **tweak();
+
+void foo(int **arg) {
+    int **original_arg = arg;
+    arg = tweak();
+    leaker(&original_arg);
+}
+} // namespace origin_region_limitation

From 1e5f336a72cc11ac0908dbe472c3a378c662053f Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 26 Aug 2024 14:55:01 +0200
Subject: [PATCH 09/65] [SCCP] Run test with both ipsccp and sccp (NFC)

---
 llvm/test/Transforms/SCCP/range-attribute.ll | 115 +++++++++++++------
 1 file changed, 82 insertions(+), 33 deletions(-)

diff --git a/llvm/test/Transforms/SCCP/range-attribute.ll b/llvm/test/Transforms/SCCP/range-attribute.ll
index 209c5464ccf221..4a72c7e0d70c9c 100644
--- a/llvm/test/Transforms/SCCP/range-attribute.ll
+++ b/llvm/test/Transforms/SCCP/range-attribute.ll
@@ -1,18 +1,30 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=ipsccp -S | FileCheck %s
+; RUN: opt < %s -passes=ipsccp -S | FileCheck %s --check-prefixes=CHECK,IPSCCP
+; RUN: opt < %s -passes=sccp -S | FileCheck %s --check-prefixes=CHECK,SCCP
 
 declare void @use(i1)
 declare i32 @get_i32()
 
 define void @range_attribute(i32 range(i32 0, 10) %v) {
-; CHECK-LABEL: @range_attribute(
-; CHECK-NEXT:    call void @use(i1 true)
-; CHECK-NEXT:    [[C2:%.*]] = icmp ult i32 [[V:%.*]], 9
-; CHECK-NEXT:    call void @use(i1 [[C2]])
-; CHECK-NEXT:    call void @use(i1 false)
-; CHECK-NEXT:    [[C4:%.*]] = icmp ugt i32 [[V]], 8
-; CHECK-NEXT:    call void @use(i1 [[C4]])
-; CHECK-NEXT:    ret void
+; IPSCCP-LABEL: @range_attribute(
+; IPSCCP-NEXT:    call void @use(i1 true)
+; IPSCCP-NEXT:    [[C2:%.*]] = icmp ult i32 [[V:%.*]], 9
+; IPSCCP-NEXT:    call void @use(i1 [[C2]])
+; IPSCCP-NEXT:    call void @use(i1 false)
+; IPSCCP-NEXT:    [[C4:%.*]] = icmp ugt i32 [[V]], 8
+; IPSCCP-NEXT:    call void @use(i1 [[C4]])
+; IPSCCP-NEXT:    ret void
+;
+; SCCP-LABEL: @range_attribute(
+; SCCP-NEXT:    [[C1:%.*]] = icmp ult i32 [[V:%.*]], 10
+; SCCP-NEXT:    call void @use(i1 [[C1]])
+; SCCP-NEXT:    [[C2:%.*]] = icmp ult i32 [[V]], 9
+; SCCP-NEXT:    call void @use(i1 [[C2]])
+; SCCP-NEXT:    [[C3:%.*]] = icmp ugt i32 [[V]], 9
+; SCCP-NEXT:    call void @use(i1 [[C3]])
+; SCCP-NEXT:    [[C4:%.*]] = icmp ugt i32 [[V]], 8
+; SCCP-NEXT:    call void @use(i1 [[C4]])
+; SCCP-NEXT:    ret void
 ;
   %c1 = icmp ult i32 %v, 10
   call void @use(i1 %c1)
@@ -26,8 +38,11 @@ define void @range_attribute(i32 range(i32 0, 10) %v) {
 }
 
 define i32 @range_attribute_single(i32 range(i32 0, 1) %v) {
-; CHECK-LABEL: @range_attribute_single(
-; CHECK-NEXT:    ret i32 0
+; IPSCCP-LABEL: @range_attribute_single(
+; IPSCCP-NEXT:    ret i32 0
+;
+; SCCP-LABEL: @range_attribute_single(
+; SCCP-NEXT:    ret i32 [[V:%.*]]
 ;
   ret i32 %v
 }
@@ -82,35 +97,52 @@ define void @call_range_result() {
 }
 
 define internal i1 @ip_cmp_range_attribute(i32 %v) {
-; CHECK-LABEL: @ip_cmp_range_attribute(
-; CHECK-NEXT:    ret i1 poison
+; IPSCCP-LABEL: @ip_cmp_range_attribute(
+; IPSCCP-NEXT:    ret i1 poison
+;
+; SCCP-LABEL: @ip_cmp_range_attribute(
+; SCCP-NEXT:    [[C:%.*]] = icmp ult i32 [[V:%.*]], 10
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %c = icmp ult i32 %v, 10
   ret i1 %c
 }
 
 define i1 @ip_range_attribute(i32 range(i32 0, 10) %v) {
-; CHECK-LABEL: @ip_range_attribute(
-; CHECK-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_attribute(i32 [[V:%.*]])
-; CHECK-NEXT:    ret i1 true
+; IPSCCP-LABEL: @ip_range_attribute(
+; IPSCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_attribute(i32 [[V:%.*]])
+; IPSCCP-NEXT:    ret i1 true
+;
+; SCCP-LABEL: @ip_range_attribute(
+; SCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_attribute(i32 [[V:%.*]])
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %c = call i1 @ip_cmp_range_attribute(i32 %v)
   ret i1 %c
 }
 
 define internal i1 @ip_cmp_range_call(i32 %v) {
-; CHECK-LABEL: @ip_cmp_range_call(
-; CHECK-NEXT:    ret i1 poison
+; IPSCCP-LABEL: @ip_cmp_range_call(
+; IPSCCP-NEXT:    ret i1 poison
+;
+; SCCP-LABEL: @ip_cmp_range_call(
+; SCCP-NEXT:    [[C:%.*]] = icmp ult i32 [[V:%.*]], 10
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %c = icmp ult i32 %v, 10
   ret i1 %c
 }
 
 define i1 @ip_range_call() {
-; CHECK-LABEL: @ip_range_call(
-; CHECK-NEXT:    [[V:%.*]] = call range(i32 0, 10) i32 @get_i32()
-; CHECK-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_call(i32 [[V]])
-; CHECK-NEXT:    ret i1 true
+; IPSCCP-LABEL: @ip_range_call(
+; IPSCCP-NEXT:    [[V:%.*]] = call range(i32 0, 10) i32 @get_i32()
+; IPSCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_call(i32 [[V]])
+; IPSCCP-NEXT:    ret i1 true
+;
+; SCCP-LABEL: @ip_range_call(
+; SCCP-NEXT:    [[V:%.*]] = call range(i32 0, 10) i32 @get_i32()
+; SCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_call(i32 [[V]])
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %v = call range(i32 0, 10) i32 @get_i32()
   %c = call i1 @ip_cmp_range_call(i32 %v)
@@ -118,18 +150,27 @@ define i1 @ip_range_call() {
 }
 
 define internal i1 @ip_cmp_range_result(i32 %v) {
-; CHECK-LABEL: @ip_cmp_range_result(
-; CHECK-NEXT:    ret i1 poison
+; IPSCCP-LABEL: @ip_cmp_range_result(
+; IPSCCP-NEXT:    ret i1 poison
+;
+; SCCP-LABEL: @ip_cmp_range_result(
+; SCCP-NEXT:    [[C:%.*]] = icmp ult i32 [[V:%.*]], 10
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %c = icmp ult i32 %v, 10
   ret i1 %c
 }
 
 define i1 @ip_range_result() {
-; CHECK-LABEL: @ip_range_result(
-; CHECK-NEXT:    [[V:%.*]] = call range(i32 0, 10) i32 @get_i32()
-; CHECK-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_result(i32 [[V]])
-; CHECK-NEXT:    ret i1 true
+; IPSCCP-LABEL: @ip_range_result(
+; IPSCCP-NEXT:    [[V:%.*]] = call range(i32 0, 10) i32 @get_i32()
+; IPSCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_result(i32 [[V]])
+; IPSCCP-NEXT:    ret i1 true
+;
+; SCCP-LABEL: @ip_range_result(
+; SCCP-NEXT:    [[V:%.*]] = call range(i32 0, 10) i32 @get_i32()
+; SCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_range_result(i32 [[V]])
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %v = call range(i32 0, 10) i32 @get_i32()
   %c = call i1 @ip_cmp_range_result(i32 %v)
@@ -137,17 +178,25 @@ define i1 @ip_range_result() {
 }
 
 define internal i1 @ip_cmp_with_range_attribute(i32 range(i32 0, 10) %v) {
-; CHECK-LABEL: @ip_cmp_with_range_attribute(
-; CHECK-NEXT:    ret i1 poison
+; IPSCCP-LABEL: @ip_cmp_with_range_attribute(
+; IPSCCP-NEXT:    ret i1 poison
+;
+; SCCP-LABEL: @ip_cmp_with_range_attribute(
+; SCCP-NEXT:    [[C:%.*]] = icmp eq i32 [[V:%.*]], 5
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %c = icmp eq i32 %v, 5
   ret i1 %c
 }
 
 define i1 @ip_range_attribute_constant() {
-; CHECK-LABEL: @ip_range_attribute_constant(
-; CHECK-NEXT:    [[C:%.*]] = call i1 @ip_cmp_with_range_attribute(i32 5)
-; CHECK-NEXT:    ret i1 true
+; IPSCCP-LABEL: @ip_range_attribute_constant(
+; IPSCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_with_range_attribute(i32 5)
+; IPSCCP-NEXT:    ret i1 true
+;
+; SCCP-LABEL: @ip_range_attribute_constant(
+; SCCP-NEXT:    [[C:%.*]] = call i1 @ip_cmp_with_range_attribute(i32 5)
+; SCCP-NEXT:    ret i1 [[C]]
 ;
   %c = call i1 @ip_cmp_with_range_attribute(i32 5)
   ret i1 %c

From 03e0be90ca149e27ca608748aa9d21bbd60dea9f Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 26 Aug 2024 14:55:41 +0200
Subject: [PATCH 10/65] [SCCP] Make use of argument attributes in non-ip SCCP

Initialize arguments based on attributes instead of hardcoding them
to overdefined. This was already properly done for ipsccp.
---
 llvm/lib/Transforms/Scalar/SCCP.cpp          |  4 +--
 llvm/test/Transforms/SCCP/range-attribute.ll | 27 ++++++--------------
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index ce45c58e624e48..caf9f890418e29 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -69,9 +69,9 @@ static bool runSCCP(Function &F, const DataLayout &DL,
   // Mark the first block of the function as being executable.
   Solver.markBlockExecutable(&F.front());
 
-  // Mark all arguments to the function as being overdefined.
+  // Initialize arguments based on attributes.
   for (Argument &AI : F.args())
-    Solver.markOverdefined(&AI);
+    Solver.trackValueOfArgument(&AI);
 
   // Solve for constants.
   bool ResolvedUndefs = true;
diff --git a/llvm/test/Transforms/SCCP/range-attribute.ll b/llvm/test/Transforms/SCCP/range-attribute.ll
index 4a72c7e0d70c9c..8b156e6f483ed4 100644
--- a/llvm/test/Transforms/SCCP/range-attribute.ll
+++ b/llvm/test/Transforms/SCCP/range-attribute.ll
@@ -6,25 +6,14 @@ declare void @use(i1)
 declare i32 @get_i32()
 
 define void @range_attribute(i32 range(i32 0, 10) %v) {
-; IPSCCP-LABEL: @range_attribute(
-; IPSCCP-NEXT:    call void @use(i1 true)
-; IPSCCP-NEXT:    [[C2:%.*]] = icmp ult i32 [[V:%.*]], 9
-; IPSCCP-NEXT:    call void @use(i1 [[C2]])
-; IPSCCP-NEXT:    call void @use(i1 false)
-; IPSCCP-NEXT:    [[C4:%.*]] = icmp ugt i32 [[V]], 8
-; IPSCCP-NEXT:    call void @use(i1 [[C4]])
-; IPSCCP-NEXT:    ret void
-;
-; SCCP-LABEL: @range_attribute(
-; SCCP-NEXT:    [[C1:%.*]] = icmp ult i32 [[V:%.*]], 10
-; SCCP-NEXT:    call void @use(i1 [[C1]])
-; SCCP-NEXT:    [[C2:%.*]] = icmp ult i32 [[V]], 9
-; SCCP-NEXT:    call void @use(i1 [[C2]])
-; SCCP-NEXT:    [[C3:%.*]] = icmp ugt i32 [[V]], 9
-; SCCP-NEXT:    call void @use(i1 [[C3]])
-; SCCP-NEXT:    [[C4:%.*]] = icmp ugt i32 [[V]], 8
-; SCCP-NEXT:    call void @use(i1 [[C4]])
-; SCCP-NEXT:    ret void
+; CHECK-LABEL: @range_attribute(
+; CHECK-NEXT:    call void @use(i1 true)
+; CHECK-NEXT:    [[C2:%.*]] = icmp ult i32 [[V:%.*]], 9
+; CHECK-NEXT:    call void @use(i1 [[C2]])
+; CHECK-NEXT:    call void @use(i1 false)
+; CHECK-NEXT:    [[C4:%.*]] = icmp ugt i32 [[V]], 8
+; CHECK-NEXT:    call void @use(i1 [[C4]])
+; CHECK-NEXT:    ret void
 ;
   %c1 = icmp ult i32 %v, 10
   call void @use(i1 %c1)

From b74248dae880793b0486483126b385ca0eafc896 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 26 Aug 2024 15:17:38 +0200
Subject: [PATCH 11/65] [InstCombine] Pass RPOT to InstCombiner (NFC)

To make use of it in a followup change.
---
 llvm/include/llvm/Transforms/InstCombine/InstCombiner.h  | 7 +++++--
 llvm/lib/Transforms/InstCombine/InstCombineInternal.h    | 8 ++++----
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 7 +++----
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index ed2e7f58ca853c..c2ea88a107c32a 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -84,6 +84,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
   // combining and will be updated to reflect any changes.
   LoopInfo *LI;
 
+  ReversePostOrderTraversal<BasicBlock *> &RPOT;
+
   bool MadeIRChange = false;
 
   /// Edges that are known to never be taken.
@@ -98,12 +100,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
                TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
                DominatorTree &DT, OptimizationRemarkEmitter &ORE,
                BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
-               ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
+               ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI,
+               ReversePostOrderTraversal<BasicBlock *> &RPOT)
       : TTI(TTI), Builder(Builder), Worklist(Worklist),
         MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
         SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true,
            /*CanUseUndef*/ true, &DC),
-        ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), LI(LI) {}
+        ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), LI(LI), RPOT(RPOT) {}
 
   virtual ~InstCombiner() = default;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index a0e846c3b5a566..b3957b760b4a29 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -66,15 +66,15 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
                    TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
                    DominatorTree &DT, OptimizationRemarkEmitter &ORE,
                    BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
-                   ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
+                   ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI,
+                   ReversePostOrderTraversal<BasicBlock *> &RPOT)
       : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE,
-                     BFI, BPI, PSI, DL, LI) {}
+                     BFI, BPI, PSI, DL, LI, RPOT) {}
 
   virtual ~InstCombinerImpl() = default;
 
   /// Perform early cleanup and prepare the InstCombine worklist.
-  bool prepareWorklist(Function &F,
-                       ReversePostOrderTraversal<BasicBlock *> &RPOT);
+  bool prepareWorklist(Function &F);
 
   /// Run the combiner over the entire worklist until it is empty.
   ///
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c3f79fe4f901ad..8a96d1d0fb4c90 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -5234,8 +5234,7 @@ class AliasScopeTracker {
 /// them to the worklist (this significantly speeds up instcombine on code where
 /// many instructions are dead or constant).  Additionally, if we find a branch
 /// whose condition is a known constant, we only visit the reachable successors.
-bool InstCombinerImpl::prepareWorklist(
-    Function &F, ReversePostOrderTraversal<BasicBlock *> &RPOT) {
+bool InstCombinerImpl::prepareWorklist(Function &F) {
   bool MadeIRChange = false;
   SmallPtrSet<BasicBlock *, 32> LiveBlocks;
   SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
@@ -5417,9 +5416,9 @@ static bool combineInstructionsOverFunction(
                       << F.getName() << "\n");
 
     InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
-                        ORE, BFI, BPI, PSI, DL, LI);
+                        ORE, BFI, BPI, PSI, DL, LI, RPOT);
     IC.MaxArraySizeForCombine = MaxArraySize;
-    bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
+    bool MadeChangeInThisIteration = IC.prepareWorklist(F);
     MadeChangeInThisIteration |= IC.run();
     if (!MadeChangeInThisIteration)
       break;

From 2f0d32692e05a763c61155d5a63d2409010cf97b Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce@outlook.com>
Date: Mon, 26 Aug 2024 21:23:05 +0800
Subject: [PATCH 12/65] [NFC][VPlan] Trim extra spaces in
 `VPDerivedIVRecipe::print` during debugging (#106041)

before:
```
    EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
    vp<%4>    = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
    vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
```

after:
```
    EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
    vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
    vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
```
---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fe1325f4163004..53b28a692059f6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1455,7 +1455,7 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
                               VPSlotTracker &SlotTracker) const {
   O << Indent;
   printAsOperand(O, SlotTracker);
-  O << Indent << "= DERIVED-IV ";
+  O << " = DERIVED-IV ";
   getStartValue()->printAsOperand(O, SlotTracker);
   O << " + ";
   getOperand(1)->printAsOperand(O, SlotTracker);

From 6f092e501b715332263987f86e9a0f26a50524cb Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christianulmann@gmail.com>
Date: Mon, 26 Aug 2024 15:23:39 +0200
Subject: [PATCH 13/65] [MLIR][Transforms] Update block arg locations during
 inlining (#106064)

This commit changes the inlining to also update the locations of block
arguments. Not updating these locations leads to LLVM IR verification
issues when exporting converted block arguments to phi nodes. This lack
of location update was not visible due to ignoring the argument
locations until recently.
Relevant change: https://github.com/llvm/llvm-project/pull/105534
---
 mlir/lib/Transforms/Utils/InliningUtils.cpp | 37 +++++++++++++++------
 mlir/test/Transforms/inlining.mlir          |  4 +--
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/mlir/lib/Transforms/Utils/InliningUtils.cpp b/mlir/lib/Transforms/Utils/InliningUtils.cpp
index ba146920fae2e9..0db097d14cd3c7 100644
--- a/mlir/lib/Transforms/Utils/InliningUtils.cpp
+++ b/mlir/lib/Transforms/Utils/InliningUtils.cpp
@@ -25,22 +25,37 @@
 
 using namespace mlir;
 
-/// Remap locations from the inlined blocks with CallSiteLoc locations with the
-/// provided caller location.
+/// Remap all locations reachable from the inlined blocks with CallSiteLoc
+/// locations with the provided caller location.
 static void
 remapInlinedLocations(iterator_range<Region::iterator> inlinedBlocks,
                       Location callerLoc) {
-  DenseMap<Location, Location> mappedLocations;
-  auto remapOpLoc = [&](Operation *op) {
-    auto it = mappedLocations.find(op->getLoc());
-    if (it == mappedLocations.end()) {
-      auto newLoc = CallSiteLoc::get(op->getLoc(), callerLoc);
-      it = mappedLocations.try_emplace(op->getLoc(), newLoc).first;
+  DenseMap<Location, LocationAttr> mappedLocations;
+  auto remapLoc = [&](Location loc) {
+    auto [it, inserted] = mappedLocations.try_emplace(loc);
+    // Only query the attribute uniquer once per callsite attribute.
+    if (inserted) {
+      auto newLoc = CallSiteLoc::get(loc, callerLoc);
+      it->getSecond() = newLoc;
     }
-    op->setLoc(it->second);
+    return it->second;
   };
-  for (auto &block : inlinedBlocks)
-    block.walk(remapOpLoc);
+
+  AttrTypeReplacer attrReplacer;
+  attrReplacer.addReplacement(
+      [&](LocationAttr loc) -> std::pair<LocationAttr, WalkResult> {
+        return {remapLoc(loc), WalkResult::skip()};
+      });
+
+  for (Block &block : inlinedBlocks) {
+    for (BlockArgument &arg : block.getArguments())
+      if (LocationAttr newLoc = remapLoc(arg.getLoc()))
+        arg.setLoc(newLoc);
+
+    for (Operation &op : block)
+      attrReplacer.recursivelyReplaceElementsIn(&op, /*replaceAttrs=*/false,
+                                                /*replaceLocs=*/true);
+  }
 }
 
 static void remapInlinedOperands(iterator_range<Region::iterator> inlinedBlocks,
diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir
index 2a08e625ba79e2..79a2936b104fa1 100644
--- a/mlir/test/Transforms/inlining.mlir
+++ b/mlir/test/Transforms/inlining.mlir
@@ -215,9 +215,9 @@ func.func @func_with_block_args_location(%arg0 : i32) {
 
 // INLINE-LOC-LABEL: func @func_with_block_args_location_callee1
 // INLINE-LOC: cf.br
-// INLINE-LOC: ^bb{{[0-9]+}}(%{{.*}}: i32 loc("foo")
+// INLINE-LOC: ^bb{{[0-9]+}}(%{{.*}}: i32 loc(callsite("foo" at "bar"))
 func.func @func_with_block_args_location_callee1(%arg0 : i32) {
-  call @func_with_block_args_location(%arg0) : (i32) -> ()
+  call @func_with_block_args_location(%arg0) : (i32) -> () loc("bar")
   return
 }
 

From ca95bee649724a6092989076322daa501a0a6594 Mon Sep 17 00:00:00 2001
From: Chris Apple <cja-private@pm.me>
Date: Mon, 26 Aug 2024 06:39:24 -0700
Subject: [PATCH 14/65] [compiler-rt][rtsan] Introduce first end to end RTsan
 lit tests, enable instrumented unit tests (#105732)

---
 compiler-rt/lib/rtsan/tests/CMakeLists.txt    | 15 +++++------
 .../lib/rtsan/tests/rtsan_test_functional.cpp | 12 ++++++---
 .../rtsan/tests/rtsan_test_interceptors.cpp   |  2 +-
 compiler-rt/test/rtsan/CMakeLists.txt         | 11 --------
 compiler-rt/test/rtsan/basic.cpp              | 21 +++++++++++++++
 compiler-rt/test/rtsan/inactive.cpp           | 26 +++++++++++++++++++
 .../test/sanitizer_common/lit.common.cfg.py   |  3 +++
 7 files changed, 67 insertions(+), 23 deletions(-)
 create mode 100644 compiler-rt/test/rtsan/basic.cpp
 create mode 100644 compiler-rt/test/rtsan/inactive.cpp

diff --git a/compiler-rt/lib/rtsan/tests/CMakeLists.txt b/compiler-rt/lib/rtsan/tests/CMakeLists.txt
index 3b783c90c26585..0320bbad592186 100644
--- a/compiler-rt/lib/rtsan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/rtsan/tests/CMakeLists.txt
@@ -60,14 +60,13 @@ endif()
 foreach(arch ${RTSAN_TEST_ARCH})
   set(RtsanTestObjects)
 
-  # TODO: Re-enable once -fsanitize=realtime exists in clang driver
-  #generate_compiler_rt_tests(RtsanTestObjects
-  #  RtsanUnitTests "Rtsan-${arch}-Test" ${arch}
-  #  COMPILE_DEPS ${RTSAN_UNITTEST_HEADERS}
-  #  SOURCES ${RTSAN_INST_TEST_SOURCES} ${COMPILER_RT_GOOGLETEST_SOURCES}
-  #  DEPS rtsan
-  #  CFLAGS ${RTSAN_UNITTEST_CFLAGS} -fsanitize=realtime
-  #  LINK_FLAGS ${RTSAN_UNITTEST_LINK_FLAGS} -fsanitize=realtime)
+  generate_compiler_rt_tests(RtsanTestObjects
+    RtsanUnitTests "Rtsan-${arch}-Test" ${arch}
+    COMPILE_DEPS ${RTSAN_UNITTEST_HEADERS}
+    SOURCES ${RTSAN_INST_TEST_SOURCES} ${COMPILER_RT_GOOGLETEST_SOURCES}
+    DEPS rtsan
+    CFLAGS ${RTSAN_UNITTEST_CFLAGS} -fsanitize=realtime
+    LINK_FLAGS ${RTSAN_UNITTEST_LINK_FLAGS} -fsanitize=realtime)
 
   set(RTSAN_TEST_RUNTIME RTRtsanTest.${arch})
   if(APPLE)
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp
index 97afb1eefb6401..6e7ab016a4c6b2 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp
@@ -145,7 +145,7 @@ TEST(TestRtsan, CopyingALambdaWithLargeCaptureDiesWhenRealtime) {
   auto lambda = [lots_of_data]() mutable {
     // Stop everything getting optimised out
     lots_of_data[3] = 0.25f;
-    EXPECT_EQ(16, lots_of_data.size());
+    EXPECT_EQ(16u, lots_of_data.size());
     EXPECT_EQ(0.25f, lots_of_data[3]);
   };
   auto Func = [&]() { InvokeStdFunction(lambda); };
@@ -156,11 +156,17 @@ TEST(TestRtsan, CopyingALambdaWithLargeCaptureDiesWhenRealtime) {
 TEST(TestRtsan, AccessingALargeAtomicVariableDiesWhenRealtime) {
   std::atomic<float> small_atomic{0.0f};
   ASSERT_TRUE(small_atomic.is_lock_free());
-  RealtimeInvoke([&small_atomic]() { float x = small_atomic.load(); });
+  RealtimeInvoke([&small_atomic]() {
+    float x = small_atomic.load();
+    return x;
+  });
 
   std::atomic<std::array<float, 2048>> large_atomic;
   ASSERT_FALSE(large_atomic.is_lock_free());
-  auto Func = [&]() { auto x = large_atomic.load(); };
+  auto Func = [&]() {
+    std::array<float, 2048> x = large_atomic.load();
+    return x;
+  };
   ExpectRealtimeDeath(Func);
   ExpectNonRealtimeSurvival(Func);
 }
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp
index 8861104068c8e9..47c07b3e47abd7 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp
@@ -321,7 +321,7 @@ TEST(TestRtsanInterceptors, PthreadCreateDiesWhenRealtime) {
   auto Func = []() {
     pthread_t thread{};
     const pthread_attr_t attr{};
-    struct thread_info *thread_info;
+    struct thread_info *thread_info{};
     pthread_create(&thread, &attr, &FakeThreadEntryPoint, thread_info);
   };
   ExpectRealtimeDeath(Func, "pthread_create");
diff --git a/compiler-rt/test/rtsan/CMakeLists.txt b/compiler-rt/test/rtsan/CMakeLists.txt
index e1f9eb39408dc1..59fc5a29703fea 100644
--- a/compiler-rt/test/rtsan/CMakeLists.txt
+++ b/compiler-rt/test/rtsan/CMakeLists.txt
@@ -1,14 +1,3 @@
-
-
-
-
-######
-# TODO: Full lit tests coming in a future review when we introduce the codegen
-######
-
-
-
-
 set(RTSAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 set(RTSAN_TESTSUITES)
diff --git a/compiler-rt/test/rtsan/basic.cpp b/compiler-rt/test/rtsan/basic.cpp
new file mode 100644
index 00000000000000..ec7382cb0ecaff
--- /dev/null
+++ b/compiler-rt/test/rtsan/basic.cpp
@@ -0,0 +1,21 @@
+// RUN: %clangxx -fsanitize=realtime %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: ios
+
+// Intent: Ensure that an intercepted call in a [[clang::nonblocking]] function
+//         is flagged as an error. Basic smoke test.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void violation() [[clang::nonblocking]] {
+  void *ptr = malloc(2);
+  printf("ptr: %p\n", ptr); // ensure we don't optimize out the malloc
+}
+
+int main() {
+  violation();
+  return 0;
+  // CHECK: {{.*Real-time violation.*}}
+  // CHECK: {{.*malloc*}}
+}
diff --git a/compiler-rt/test/rtsan/inactive.cpp b/compiler-rt/test/rtsan/inactive.cpp
new file mode 100644
index 00000000000000..69edc63a4cfd41
--- /dev/null
+++ b/compiler-rt/test/rtsan/inactive.cpp
@@ -0,0 +1,26 @@
+// RUN: %clangxx %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: ios
+
+// Intent: Ensure [[clang::nonblocking]] has no impact if -fsanitize=realtime is not used
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// In this test, we don't use the -fsanitize=realtime flag, so nothing
+// should happen here
+void violation() [[clang::nonblocking]] {
+  void *ptr = malloc(2);
+  printf("ptr: %p\n", ptr); // ensure we don't optimize out the malloc
+}
+
+int main() {
+  printf("Starting run\n");
+  violation();
+  printf("No violations ended the program\n");
+  return 0;
+  // CHECK: {{.*Starting run.*}}
+  // CHECK NOT: {{.*Real-time violation.*}}
+  // CHECK NOT: {{.*malloc*}}
+  // CHECK: {{.*No violations ended the program.*}}
+}
diff --git a/compiler-rt/test/sanitizer_common/lit.common.cfg.py b/compiler-rt/test/sanitizer_common/lit.common.cfg.py
index 04af4816eb6e78..5406e8838f2fcf 100644
--- a/compiler-rt/test/sanitizer_common/lit.common.cfg.py
+++ b/compiler-rt/test/sanitizer_common/lit.common.cfg.py
@@ -18,6 +18,9 @@
     tool_options = "HWASAN_OPTIONS"
     if not config.has_lld:
         config.unsupported = True
+elif config.tool_name == "rtsan":
+    tool_cflags = ["-fsanitize=realtime"]
+    tool_options = "RTSAN_OPTIONS"
 elif config.tool_name == "tsan":
     tool_cflags = ["-fsanitize=thread"]
     tool_options = "TSAN_OPTIONS"

From 1c48c9cc43dbbbd1751e15d199b4d7d4fc52d828 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Mon, 26 Aug 2024 09:58:19 -0400
Subject: [PATCH 15/65] [libc++] Implement P2985R0: std::is_virtual_base_of
 (#105847)

This trait is implemented in C++26 conditionally on the compiler
supporting the __builtin_is_virtual_base_of intrinsic. I believe only
tip-of-trunk Clang currently implements that builtin.

Closes #105432
---
 libcxx/docs/FeatureTestMacroTable.rst         |   2 +-
 libcxx/docs/ReleaseNotes/20.rst               |   2 +-
 libcxx/docs/Status/Cxx2cPapers.csv            |   2 +-
 libcxx/include/__type_traits/is_base_of.h     |  12 ++
 libcxx/include/type_traits                    |   3 +
 libcxx/include/version                        |   4 +-
 .../type_traits.version.compile.pass.cpp      |   6 +-
 .../version.version.compile.pass.cpp          |   6 +-
 .../meta/meta.rel/is_virtual_base_of.pass.cpp | 166 ++++++++++++++++++
 .../generate_feature_test_macro_components.py |   3 +-
 10 files changed, 195 insertions(+), 11 deletions(-)
 create mode 100644 libcxx/test/std/utilities/meta/meta.rel/is_virtual_base_of.pass.cpp

diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index a1506e115fe70f..f6d3142c1e2d3e 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -442,7 +442,7 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_inplace_vector``                               *unimplemented*
     ---------------------------------------------------------- -----------------
-    ``__cpp_lib_is_virtual_base_of``                           *unimplemented*
+    ``__cpp_lib_is_virtual_base_of``                           ``202406L``
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_is_within_lifetime``                           *unimplemented*
     ---------------------------------------------------------- -----------------
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index fe9f4c1973cdb4..bc28f380945bc3 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -38,7 +38,7 @@ What's New in Libc++ 20.0.0?
 Implemented Papers
 ------------------
 
-- TODO
+- P2985R0: A type trait for detecting virtual base classes (`Github <https://github.com/llvm/llvm-project/issues/105432>`__)
 
 
 Improvements and New Features
diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv
index dd62bcc2555ffc..d95cb11f483c00 100644
--- a/libcxx/docs/Status/Cxx2cPapers.csv
+++ b/libcxx/docs/Status/Cxx2cPapers.csv
@@ -68,7 +68,7 @@
 "`P2389R2 <https://wg21.link/P2389R2>`__","``dextents`` Index Type Parameter","2024-06 (St. Louis)","|Complete|","19.0",""
 "`P3168R2 <https://wg21.link/P3168R2>`__","Give ``std::optional`` Range Support","2024-06 (St. Louis)","","","|ranges|"
 "`P3217R0 <https://wg21.link/P3217R0>`__","Adjoints to 'Enabling list-initialization for algorithms': find_last","2024-06 (St. Louis)","","",""
-"`P2985R0 <https://wg21.link/P2985R0>`__","A type trait for detecting virtual base classes","2024-06 (St. Louis)","","",""
+"`P2985R0 <https://wg21.link/P2985R0>`__","A type trait for detecting virtual base classes","2024-06 (St. Louis)","|Complete|","20.0",""
 "`P0843R14 <https://wg21.link/P0843R14>`__","``inplace_vector``","2024-06 (St. Louis)","","",""
 "`P3235R3 <https://wg21.link/P3235R3>`__","``std::print`` more types faster with less memory","2024-06 (St. Louis)","","","|format| |DR|"
 "`P2968R2 <https://wg21.link/P2968R2>`__","Make ``std::ignore`` a first-class object","2024-06 (St. Louis)","|Complete|","19.0",""
diff --git a/libcxx/include/__type_traits/is_base_of.h b/libcxx/include/__type_traits/is_base_of.h
index 090abeeb54dccb..488b63719eb600 100644
--- a/libcxx/include/__type_traits/is_base_of.h
+++ b/libcxx/include/__type_traits/is_base_of.h
@@ -26,6 +26,18 @@ template <class _Bp, class _Dp>
 inline constexpr bool is_base_of_v = __is_base_of(_Bp, _Dp);
 #endif
 
+#if _LIBCPP_STD_VER >= 26
+#  if __has_builtin(__builtin_is_virtual_base_of)
+
+template <class _Base, class _Derived>
+struct _LIBCPP_TEMPLATE_VIS is_virtual_base_of : public bool_constant<__builtin_is_virtual_base_of(_Base, _Derived)> {};
+
+template <class _Base, class _Derived>
+inline constexpr bool is_virtual_base_of_v = __builtin_is_virtual_base_of(_Base, _Derived);
+
+#  endif
+#endif
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_IS_BASE_OF_H
diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits
index 7f231cd09df510..5937d4fdc9e1a7 100644
--- a/libcxx/include/type_traits
+++ b/libcxx/include/type_traits
@@ -144,6 +144,7 @@ namespace std
     // Relationships between types:
     template <class T, class U> struct is_same;
     template <class Base, class Derived> struct is_base_of;
+    template <class Base, class Derived> struct is_virtual_base_of;     // C++26
 
     template <class From, class To> struct is_convertible;
     template <typename From, typename To> struct is_nothrow_convertible;                  // C++20
@@ -391,6 +392,8 @@ namespace std
         = is_same<T, U>::value;                                          // C++17
       template <class Base, class Derived> inline constexpr bool is_base_of_v
         = is_base_of<Base, Derived>::value;                              // C++17
+      template <class Base, class Derived> inline constexpr bool is_virtual_base_of_v
+        = is_virtual_base_of<Base, Derived>::value;                      // C++26
       template <class From, class To> inline constexpr bool is_convertible_v
         = is_convertible<From, To>::value;                               // C++17
       template <class Fn, class... ArgTypes> inline constexpr bool is_invocable_v
diff --git a/libcxx/include/version b/libcxx/include/version
index fe64343eafbc9c..a19be2d294afd3 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -531,7 +531,9 @@ __cpp_lib_void_t                                        201411L <type_traits>
 // # define __cpp_lib_generate_random                      202403L
 // # define __cpp_lib_hazard_pointer                       202306L
 // # define __cpp_lib_inplace_vector                       202406L
-// # define __cpp_lib_is_virtual_base_of                   202406L
+# if __has_builtin(__builtin_is_virtual_base_of)
+#   define __cpp_lib_is_virtual_base_of                 202406L
+# endif
 // # define __cpp_lib_is_within_lifetime                   202306L
 // # define __cpp_lib_linalg                               202311L
 # undef  __cpp_lib_mdspan
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp
index bb69ca7368aafa..1cbf2699a95bcc 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/type_traits.version.compile.pass.cpp
@@ -857,16 +857,16 @@
 #   error "__cpp_lib_is_swappable should have the value 201603L in c++26"
 # endif
 
-# if !defined(_LIBCPP_VERSION)
+# if __has_builtin(__builtin_is_virtual_base_of)
 #   ifndef __cpp_lib_is_virtual_base_of
 #     error "__cpp_lib_is_virtual_base_of should be defined in c++26"
 #   endif
 #   if __cpp_lib_is_virtual_base_of != 202406L
 #     error "__cpp_lib_is_virtual_base_of should have the value 202406L in c++26"
 #   endif
-# else // _LIBCPP_VERSION
+# else
 #   ifdef __cpp_lib_is_virtual_base_of
-#     error "__cpp_lib_is_virtual_base_of should not be defined because it is unimplemented in libc++!"
+#     error "__cpp_lib_is_virtual_base_of should not be defined when the requirement '__has_builtin(__builtin_is_virtual_base_of)' is not met!"
 #   endif
 # endif
 
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index b8bad696f1bae0..bd2959d55dc20d 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -7172,16 +7172,16 @@
 #   error "__cpp_lib_is_swappable should have the value 201603L in c++26"
 # endif
 
-# if !defined(_LIBCPP_VERSION)
+# if __has_builtin(__builtin_is_virtual_base_of)
 #   ifndef __cpp_lib_is_virtual_base_of
 #     error "__cpp_lib_is_virtual_base_of should be defined in c++26"
 #   endif
 #   if __cpp_lib_is_virtual_base_of != 202406L
 #     error "__cpp_lib_is_virtual_base_of should have the value 202406L in c++26"
 #   endif
-# else // _LIBCPP_VERSION
+# else
 #   ifdef __cpp_lib_is_virtual_base_of
-#     error "__cpp_lib_is_virtual_base_of should not be defined because it is unimplemented in libc++!"
+#     error "__cpp_lib_is_virtual_base_of should not be defined when the requirement '__has_builtin(__builtin_is_virtual_base_of)' is not met!"
 #   endif
 # endif
 
diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_virtual_base_of.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_virtual_base_of.pass.cpp
new file mode 100644
index 00000000000000..6b34d56e2c6f45
--- /dev/null
+++ b/libcxx/test/std/utilities/meta/meta.rel/is_virtual_base_of.pass.cpp
@@ -0,0 +1,166 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
+
+// These compilers don't support __builtin_is_virtual_base_of yet.
+// UNSUPPORTED: clang-17, clang-18, clang-19, gcc-14, apple-clang-16, apple-clang-17
+
+// <type_traits>
+
+// std::is_virtual_base_of
+
+#include <type_traits>
+#include <cassert>
+
+template <bool expected, class Base, class Derived>
+void test() {
+  // Test the type of the variables
+  {
+    static_assert(std::is_same_v<bool const, decltype(std::is_virtual_base_of<Base, Derived>::value)>);
+    static_assert(std::is_same_v<bool const, decltype(std::is_virtual_base_of_v<Base, Derived>)>);
+  }
+
+  // Test their value
+  {
+    static_assert(std::is_virtual_base_of<Base, Derived>::value == expected);
+    static_assert(std::is_virtual_base_of<const Base, Derived>::value == expected);
+    static_assert(std::is_virtual_base_of<Base, const Derived>::value == expected);
+    static_assert(std::is_virtual_base_of<const Base, const Derived>::value == expected);
+
+    static_assert(std::is_virtual_base_of_v<Base, Derived> == expected);
+    static_assert(std::is_virtual_base_of_v<const Base, Derived> == expected);
+    static_assert(std::is_virtual_base_of_v<Base, const Derived> == expected);
+    static_assert(std::is_virtual_base_of_v<const Base, const Derived> == expected);
+  }
+
+  // Check the relationship with is_base_of. If it's not a base of, it can't be a virtual base of.
+  { static_assert(!std::is_base_of_v<Base, Derived> ? !std::is_virtual_base_of_v<Base, Derived> : true); }
+
+  // Make sure they can be referenced at runtime
+  {
+    bool const& a = std::is_virtual_base_of<Base, Derived>::value;
+    bool const& b = std::is_virtual_base_of_v<Base, Derived>;
+    assert(a == expected);
+    assert(b == expected);
+  }
+}
+
+struct Incomplete;
+struct Unrelated {};
+union IncompleteUnion;
+union Union {
+  int i;
+  float f;
+};
+
+class Base {};
+class Derived : Base {};
+class Derived2 : Base {};
+class Derived2a : Derived {};
+class Derived2b : Derived {};
+class Derived3Virtual : virtual Derived2a, virtual Derived2b {};
+
+struct DerivedTransitiveViaNonVirtual : Derived3Virtual {};
+struct DerivedTransitiveViaVirtual : virtual Derived3Virtual {};
+
+template <typename T>
+struct CrazyDerived : T {};
+template <typename T>
+struct CrazyDerivedVirtual : virtual T {};
+
+struct DerivedPrivate : private virtual Base {};
+struct DerivedProtected : protected virtual Base {};
+struct DerivedPrivatePrivate : private DerivedPrivate {};
+struct DerivedPrivateProtected : private DerivedProtected {};
+struct DerivedProtectedPrivate : protected DerivedProtected {};
+struct DerivedProtectedProtected : protected DerivedProtected {};
+struct DerivedTransitivePrivate : private Derived, private Derived2 {};
+
+int main(int, char**) {
+  // Test with non-virtual inheritance
+  {
+    test<false, Base, Base>();
+    test<false, Base, Derived>();
+    test<false, Base, Derived2>();
+    test<false, Derived, DerivedTransitivePrivate>();
+    test<false, Derived, Base>();
+    test<false, Incomplete, Derived>();
+
+    // Derived must be a complete type if Base and Derived are non-union class types
+    // test<false, Base, Incomplete>();
+  }
+
+  // Test with virtual inheritance
+  {
+    test<false, Base, Derived3Virtual>();
+    test<false, Derived, Derived3Virtual>();
+    test<true, Derived2b, Derived3Virtual>();
+    test<true, Derived2a, Derived3Virtual>();
+    test<true, Base, DerivedPrivate>();
+    test<true, Base, DerivedProtected>();
+    test<true, Base, DerivedPrivatePrivate>();
+    test<true, Base, DerivedPrivateProtected>();
+    test<true, Base, DerivedProtectedPrivate>();
+    test<true, Base, DerivedProtectedProtected>();
+    test<true, Derived2a, DerivedTransitiveViaNonVirtual>();
+    test<true, Derived2b, DerivedTransitiveViaNonVirtual>();
+    test<true, Derived2a, DerivedTransitiveViaVirtual>();
+    test<true, Derived2b, DerivedTransitiveViaVirtual>();
+    test<false, Base, CrazyDerived<Base>>();
+    test<false, CrazyDerived<Base>, Base>();
+    test<true, Base, CrazyDerivedVirtual<Base>>();
+    test<false, CrazyDerivedVirtual<Base>, Base>();
+  }
+
+  // Test unrelated types
+  {
+    test<false, Base&, Derived&>();
+    test<false, Base[3], Derived[3]>();
+    test<false, Unrelated, Derived>();
+    test<false, Base, Unrelated>();
+    test<false, Base, void>();
+    test<false, void, Derived>();
+  }
+
+  // Test scalar types
+  {
+    test<false, int, Base>();
+    test<false, int, Derived>();
+    test<false, int, Incomplete>();
+    test<false, int, int>();
+
+    test<false, Base, int>();
+    test<false, Derived, int>();
+    test<false, Incomplete, int>();
+
+    test<false, int[], int[]>();
+    test<false, long, int>();
+    test<false, int, long>();
+  }
+
+  // Test unions
+  {
+    test<false, Union, Union>();
+    test<false, IncompleteUnion, IncompleteUnion>();
+    test<false, Union, IncompleteUnion>();
+    test<false, IncompleteUnion, Union>();
+    test<false, Incomplete, IncompleteUnion>();
+    test<false, IncompleteUnion, Incomplete>();
+    test<false, Unrelated, IncompleteUnion>();
+    test<false, IncompleteUnion, Unrelated>();
+    test<false, int, IncompleteUnion>();
+    test<false, IncompleteUnion, int>();
+    test<false, Unrelated, Union>();
+    test<false, Union, Unrelated>();
+    test<false, int, Unrelated>();
+    test<false, Union, int>();
+  }
+
+  return 0;
+}
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index b041b08f02aac5..f402d4de2275e5 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -784,7 +784,8 @@ def add_version_header(tc):
                 "c++26": 202406  # P2985R0 A type trait for detecting virtual base classes
             },
             "headers": ["type_traits"],
-            "unimplemented": True,
+            "test_suite_guard": "__has_builtin(__builtin_is_virtual_base_of)",
+            "libcxx_guard": "__has_builtin(__builtin_is_virtual_base_of)",
         },
         {
             "name": "__cpp_lib_is_within_lifetime",

From b8f134faba3a41f47d2d05125118ea1acf512cb3 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Mon, 26 Aug 2024 09:00:10 -0500
Subject: [PATCH 16/65] [libc] Implement 'vfscanf' and 'vscanf' (#105293)

Summary:
These are simply forwarding the vlist to the existing helper.
---
 libc/config/linux/aarch64/entrypoints.txt |  2 +
 libc/config/linux/riscv/entrypoints.txt   |  2 +
 libc/config/linux/x86_64/entrypoints.txt  |  2 +
 libc/newhdrgen/yaml/stdio.yaml            | 15 ++++
 libc/spec/stdc.td                         | 13 +++
 libc/src/stdio/CMakeLists.txt             | 20 +++++
 libc/src/stdio/vfscanf.cpp                | 34 ++++++++
 libc/src/stdio/vfscanf.h                  | 24 ++++++
 libc/src/stdio/vscanf.cpp                 | 40 +++++++++
 libc/src/stdio/vscanf.h                   | 23 ++++++
 libc/test/src/stdio/CMakeLists.txt        | 14 ++++
 libc/test/src/stdio/vfscanf_test.cpp      | 98 +++++++++++++++++++++++
 12 files changed, 287 insertions(+)
 create mode 100644 libc/src/stdio/vfscanf.cpp
 create mode 100644 libc/src/stdio/vfscanf.h
 create mode 100644 libc/src/stdio/vscanf.cpp
 create mode 100644 libc/src/stdio/vscanf.h
 create mode 100644 libc/test/src/stdio/vfscanf_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index d22bd1153598eb..60aa7f5ccb319a 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -211,10 +211,12 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.fileno
     libc.src.stdio.fprintf
     libc.src.stdio.fscanf
+    libc.src.stdio.vfscanf
     libc.src.stdio.printf
     libc.src.stdio.remove
     libc.src.stdio.rename
     libc.src.stdio.scanf
+    libc.src.stdio.vscanf
     libc.src.stdio.snprintf
     libc.src.stdio.sprintf
     libc.src.stdio.asprintf
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 1a647737ec455a..9a2746dcb86f87 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -210,10 +210,12 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.fileno
     libc.src.stdio.fprintf
     libc.src.stdio.fscanf
+    libc.src.stdio.vfscanf
     libc.src.stdio.printf
     libc.src.stdio.remove
     libc.src.stdio.rename
     libc.src.stdio.scanf
+    libc.src.stdio.vscanf
     libc.src.stdio.snprintf
     libc.src.stdio.sprintf
     libc.src.stdio.asprintf
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index bac1e3cfa85da7..141dc70463d64a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -210,10 +210,12 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.fileno
     libc.src.stdio.fprintf
     libc.src.stdio.fscanf
+    libc.src.stdio.vfscanf
     libc.src.stdio.printf
     libc.src.stdio.remove
     libc.src.stdio.rename
     libc.src.stdio.scanf
+    libc.src.stdio.vscanf
     libc.src.stdio.snprintf
     libc.src.stdio.sprintf
     libc.src.stdio.asprintf
diff --git a/libc/newhdrgen/yaml/stdio.yaml b/libc/newhdrgen/yaml/stdio.yaml
index 43438699b58409..fd116bbc00895d 100644
--- a/libc/newhdrgen/yaml/stdio.yaml
+++ b/libc/newhdrgen/yaml/stdio.yaml
@@ -178,6 +178,14 @@ functions:
       - type: FILE *__restrict
       - type: const char *__restrict
       - type: '...'
+  - name: vfscanf
+    standards:
+      - stdc
+    return_type: int
+    arguments:
+      - type: FILE *__restrict
+      - type: const char *__restrict
+      - type: va_list
   - name: fseek
     standards:
       - stdc
@@ -284,6 +292,13 @@ functions:
     arguments:
       - type: const char *__restrict
       - type: '...'
+  - name: vscanf
+    standards:
+      - stdc
+    return_type: int
+    arguments:
+      - type: const char *__restrict
+      - type: va_list
   - name: setbuf
     standards:
       - stdc
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 6d8be9f8e4016d..026cc72b458a77 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -1042,6 +1042,12 @@ def StdC : StandardSpec<"stdc"> {
               [ArgSpec<ConstCharRestrictedPtr>,
                ArgSpec<VarArgType>]
           >,
+          FunctionSpec<
+              "vscanf",
+              RetValSpec<IntType>,
+              [ArgSpec<ConstCharRestrictedPtr>,
+               ArgSpec<VaListType>]
+          >,
           FunctionSpec<
               "fscanf",
               RetValSpec<IntType>,
@@ -1049,6 +1055,13 @@ def StdC : StandardSpec<"stdc"> {
                ArgSpec<ConstCharRestrictedPtr>,
                ArgSpec<VarArgType>]
           >,
+          FunctionSpec<
+              "vfscanf",
+              RetValSpec<IntType>,
+              [ArgSpec<FILERestrictedPtr>,
+               ArgSpec<ConstCharRestrictedPtr>,
+               ArgSpec<VaListType>]
+          >,
           FunctionSpec<
               "sprintf",
               RetValSpec<IntType>,
diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
index 372b8fc8192455..b9bc904471df9a 100644
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -143,6 +143,16 @@ add_entrypoint_object(
     ${scanf_deps}
 )
 
+add_entrypoint_object(
+  vfscanf
+  SRCS
+    vfscanf.cpp
+  HDRS
+    vfscanf.h
+  DEPENDS
+    ${scanf_deps}
+)
+
 add_entrypoint_object(
   scanf
   SRCS
@@ -153,6 +163,16 @@ add_entrypoint_object(
     ${scanf_deps}
 )
 
+add_entrypoint_object(
+  vscanf
+  SRCS
+    vscanf.cpp
+  HDRS
+    vscanf.h
+  DEPENDS
+    ${scanf_deps}
+)
+
 add_entrypoint_object(
   sprintf
   SRCS
diff --git a/libc/src/stdio/vfscanf.cpp b/libc/src/stdio/vfscanf.cpp
new file mode 100644
index 00000000000000..220576522d0fdb
--- /dev/null
+++ b/libc/src/stdio/vfscanf.cpp
@@ -0,0 +1,34 @@
+//===-- Implementation of vfscanf -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/vfscanf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/scanf_core/vfscanf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include <stdarg.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, vfscanf,
+                   (::FILE *__restrict stream, const char *__restrict format,
+                    va_list vlist)) {
+  internal::ArgList args(vlist); // This holder class allows for easier copying
+                                 // and pointer semantics, as well as handling
+                                 // destruction automatically.
+  va_end(vlist);
+  int ret_val = scanf_core::vfscanf_internal(stream, format, args);
+  // This is done to avoid including stdio.h in the internals. On most systems
+  // EOF is -1, so this will be transformed into just "return ret_val".
+  return (ret_val == -1) ? EOF : ret_val;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/vfscanf.h b/libc/src/stdio/vfscanf.h
new file mode 100644
index 00000000000000..1a0a12d9eb4cd3
--- /dev/null
+++ b/libc/src/stdio/vfscanf.h
@@ -0,0 +1,24 @@
+//===-- Implementation header of vfscanf ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_VFSCANF_H
+#define LLVM_LIBC_SRC_STDIO_VFSCANF_H
+
+#include "hdr/types/FILE.h"
+#include "src/__support/macros/config.h"
+
+#include <stdarg.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+int vfscanf(::FILE *__restrict stream, const char *__restrict format,
+            va_list vlist);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDIO_VFSCANF_H
diff --git a/libc/src/stdio/vscanf.cpp b/libc/src/stdio/vscanf.cpp
new file mode 100644
index 00000000000000..64f5cc1d6962a1
--- /dev/null
+++ b/libc/src/stdio/vscanf.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of vscanf --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/vscanf.h"
+
+#include "src/__support/File/file.h"
+#include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/scanf_core/vfscanf_internal.h"
+
+#include "hdr/types/FILE.h"
+#include <stdarg.h>
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define SCANF_STDIN LIBC_NAMESPACE::stdin
+#else // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#define SCANF_STDIN ::stdin
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, vscanf,
+                   (const char *__restrict format, va_list vlist)) {
+  internal::ArgList args(vlist); // This holder class allows for easier copying
+                                 // and pointer semantics, as well as handling
+                                 // destruction automatically.
+  va_end(vlist);
+  int ret_val = scanf_core::vfscanf_internal(
+      reinterpret_cast<::FILE *>(SCANF_STDIN), format, args);
+  // This is done to avoid including stdio.h in the internals. On most systems
+  // EOF is -1, so this will be transformed into just "return ret_val".
+  return (ret_val == -1) ? EOF : ret_val;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/vscanf.h b/libc/src/stdio/vscanf.h
new file mode 100644
index 00000000000000..5c59b91128ea32
--- /dev/null
+++ b/libc/src/stdio/vscanf.h
@@ -0,0 +1,23 @@
+//===-- Implementation header of vscanf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_VSCANF_H
+#define LLVM_LIBC_SRC_STDIO_VSCANF_H
+
+#include "hdr/types/FILE.h"
+#include "src/__support/macros/config.h"
+
+#include <stdarg.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+int vscanf(const char *__restrict format, va_list vlist);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDIO_VSCANF_H
diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt
index 8b05b928a02695..d7b88186b5704a 100644
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -286,6 +286,20 @@ add_libc_test(
     ${use_system_file}
 )
 
+add_libc_test(
+  vfscanf_test
+  SUITE
+    libc_stdio_unittests
+  SRCS
+    vfscanf_test.cpp
+  DEPENDS
+    libc.src.stdio.vfscanf
+    ${fscanf_test_deps}
+    libc.src.__support.CPP.string_view
+  COMPILE_OPTIONS
+    ${use_system_file}
+)
+
 if(LIBC_CONF_SCANF_DISABLE_FLOAT)
   list(APPEND sscanf_test_copts "-DLIBC_COPT_SCANF_DISABLE_FLOAT")
 endif()
diff --git a/libc/test/src/stdio/vfscanf_test.cpp b/libc/test/src/stdio/vfscanf_test.cpp
new file mode 100644
index 00000000000000..7a9cbf7f123880
--- /dev/null
+++ b/libc/test/src/stdio/vfscanf_test.cpp
@@ -0,0 +1,98 @@
+//===-- Unittests for vfscanf ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/string_view.h"
+
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+#include "src/stdio/fclose.h"
+#include "src/stdio/ferror.h"
+#include "src/stdio/fopen.h"
+#include "src/stdio/fwrite.h"
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+
+#include "src/stdio/vfscanf.h"
+
+#include "test/UnitTest/Test.h"
+
+#include <stdio.h>
+
+namespace scanf_test {
+#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
+using LIBC_NAMESPACE::fclose;
+using LIBC_NAMESPACE::ferror;
+using LIBC_NAMESPACE::fopen;
+using LIBC_NAMESPACE::fwrite;
+#else  // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
+using ::fclose;
+using ::ferror;
+using ::fopen;
+using ::fwrite;
+#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
+} // namespace scanf_test
+
+static int call_vfscanf(::FILE *stream, const char *__restrict format, ...) {
+  va_list vlist;
+  va_start(vlist, format);
+  int ret = LIBC_NAMESPACE::vfscanf(stream, format, vlist);
+  va_end(vlist);
+  return ret;
+}
+
+TEST(LlvmLibcFScanfTest, WriteToFile) {
+  const char *FILENAME = "fscanf_output.test";
+  auto FILE_PATH = libc_make_test_file_path(FILENAME);
+  ::FILE *file = scanf_test::fopen(FILE_PATH, "w");
+  ASSERT_FALSE(file == nullptr);
+
+  int read;
+
+  constexpr char simple[] = "A simple string with no conversions.\n";
+
+  ASSERT_EQ(sizeof(simple) - 1,
+            scanf_test::fwrite(simple, 1, sizeof(simple) - 1, file));
+
+  constexpr char numbers[] = "1234567890\n";
+
+  ASSERT_EQ(sizeof(numbers) - 1,
+            scanf_test::fwrite(numbers, 1, sizeof(numbers) - 1, file));
+
+  constexpr char numbers_and_more[] = "1234 and more\n";
+
+  ASSERT_EQ(sizeof(numbers_and_more) - 1,
+            scanf_test::fwrite(numbers_and_more, 1,
+                               sizeof(numbers_and_more) - 1, file));
+
+  read = call_vfscanf(file, "Reading from a write-only file should fail.");
+  EXPECT_LT(read, 0);
+
+  ASSERT_EQ(0, scanf_test::fclose(file));
+
+  file = scanf_test::fopen(FILE_PATH, "r");
+  ASSERT_FALSE(file == nullptr);
+
+  char data[50];
+  read = call_vfscanf(file, "%[A-Za-z .\n]", data);
+  ASSERT_EQ(read, 1);
+  ASSERT_STREQ(simple, data);
+
+  read = call_vfscanf(file, "%s", data);
+  ASSERT_EQ(read, 1);
+  ASSERT_EQ(LIBC_NAMESPACE::cpp::string_view(numbers, 10),
+            LIBC_NAMESPACE::cpp::string_view(data));
+
+  // The format string starts with a space to handle the fact that the %s leaves
+  // a trailing \n and %c doesn't strip leading whitespace.
+  read = call_vfscanf(file, " %50c", data);
+  ASSERT_EQ(read, 1);
+  ASSERT_EQ(
+      LIBC_NAMESPACE::cpp::string_view(numbers_and_more),
+      LIBC_NAMESPACE::cpp::string_view(data, sizeof(numbers_and_more) - 1));
+
+  ASSERT_EQ(scanf_test::ferror(file), 0);
+  ASSERT_EQ(scanf_test::fclose(file), 0);
+}

From 499e13514aaf2efdcd85520ade791ed635502adb Mon Sep 17 00:00:00 2001
From: "S. B. Tam" <cpplearner@outlook.com>
Date: Mon, 26 Aug 2024 22:08:52 +0800
Subject: [PATCH 17/65] [libc++][test] Do not test Clang bug in
 `is_constructible.pass.cpp` (#105964)

A comment in `is_constructible.pass.cpp` suggests that Clang is
non-conforming in accepting construction of `const int&` from
`ExplicitTo<int&&>`.

This PR changes the test to expect the standard-conforming behavior,
which makes the test pass on MSVC.
---
 .../meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp
index 578efb90f7f1ab..9a5efe7b5fe32f 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp
@@ -228,8 +228,8 @@ int main(int, char**)
     // But the rvalue to lvalue reference binding isn't allowed according to
     // [over.match.ref] despite Clang accepting it.
     test_is_constructible<int&, ExplicitTo<int&>>();
-#ifndef TEST_COMPILER_GCC
-    test_is_constructible<const int&, ExplicitTo<int&&>>();
+#ifndef TEST_COMPILER_CLANG
+    test_is_not_constructible<const int&, ExplicitTo<int&&>>();
 #endif
 
     static_assert(std::is_constructible<int&&, ExplicitTo<int&&>>::value, "");

From b9d3da8c8d277a7fc2223c659122bb377a0e54e0 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Mon, 26 Aug 2024 04:31:44 -0700
Subject: [PATCH 18/65] [SLP]Fix PR105904: the root node might be a gather node
 without user for reductions.

Before checking the user components of the gather/buildvector nodes,
need to check if the node has users at all. Root nodes might not have
users, if it is a node for the reduction.

Fixes https://github.com/llvm/llvm-project/issues/105904
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  3 +-
 .../RISCV/gather-node-with-no-users.ll        | 74 +++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 949579772b94d5..def73e8d0c0db7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3011,7 +3011,8 @@ class BoUpSLP {
     }
 
     bool isOperandGatherNode(const EdgeInfo &UserEI) const {
-      return isGather() && UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
+      return isGather() && (Idx > 0 || !UserTreeIndices.empty()) &&
+             UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
              UserTreeIndices.front().UserTE == UserEI.UserTE;
     }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll
new file mode 100644
index 00000000000000..f07b6bbe8d6621
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvl512b < %s | FileCheck %s
+
+define void @test(ptr %c) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[C]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 222, i64 228, i64 276, i64 279, i64 282, i64 285, i64 288, i64 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> <i64 0, i64 345, i64 348, i64 351, i64 354, i64 357, i64 360, i64 363>
+; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
+; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
+; CHECK-NEXT:    br label %[[FOR_COND:.*]]
+; CHECK:       [[FOR_COND]]:
+; CHECK-NEXT:    [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP4]], i64 0)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP5]], i64 8)
+; CHECK-NEXT:    [[TMP8]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP7]])
+; CHECK-NEXT:    br label %[[FOR_COND]]
+;
+entry:
+  %arrayidx8.5.3 = getelementptr i8, ptr %c, i64 222
+  %0 = load i8, ptr %arrayidx8.5.3, align 1
+  %arrayidx8.7.3 = getelementptr i8, ptr %c, i64 228
+  %1 = load i8, ptr %arrayidx8.7.3, align 1
+  %arrayidx8.434 = getelementptr i8, ptr %c, i64 276
+  %2 = load i8, ptr %arrayidx8.434, align 1
+  %arrayidx8.1.4 = getelementptr i8, ptr %c, i64 279
+  %3 = load i8, ptr %arrayidx8.1.4, align 1
+  %arrayidx8.2.4 = getelementptr i8, ptr %c, i64 282
+  %4 = load i8, ptr %arrayidx8.2.4, align 1
+  %arrayidx8.3.4 = getelementptr i8, ptr %c, i64 285
+  %5 = load i8, ptr %arrayidx8.3.4, align 1
+  %arrayidx8.4.4 = getelementptr i8, ptr %c, i64 288
+  %6 = load i8, ptr %arrayidx8.4.4, align 1
+  %7 = load i8, ptr %c, align 1
+  %8 = load i8, ptr %c, align 1
+  %arrayidx8.536 = getelementptr i8, ptr %c, i64 345
+  %9 = load i8, ptr %arrayidx8.536, align 1
+  %arrayidx8.1.5 = getelementptr i8, ptr %c, i64 348
+  %10 = load i8, ptr %arrayidx8.1.5, align 1
+  %arrayidx8.2.5 = getelementptr i8, ptr %c, i64 351
+  %11 = load i8, ptr %arrayidx8.2.5, align 1
+  %arrayidx8.3.5 = getelementptr i8, ptr %c, i64 354
+  %12 = load i8, ptr %arrayidx8.3.5, align 1
+  %arrayidx8.4.5 = getelementptr i8, ptr %c, i64 357
+  %13 = load i8, ptr %arrayidx8.4.5, align 1
+  %arrayidx8.5.5 = getelementptr i8, ptr %c, i64 360
+  %14 = load i8, ptr %arrayidx8.5.5, align 1
+  %arrayidx8.6.5 = getelementptr i8, ptr %c, i64 363
+  %15 = load i8, ptr %arrayidx8.6.5, align 1
+  br label %for.cond
+
+for.cond:
+  %a.promoted2226 = phi i8 [ 0, %entry ], [ %or18.6.5, %for.cond ]
+  %or18.7.3 = or i8 %0, %1
+  %or18.435 = or i8 %or18.7.3, %2
+  %or18.1.4 = or i8 %or18.435, %3
+  %or18.2.4 = or i8 %or18.1.4, %4
+  %or18.3.4 = or i8 %or18.2.4, %5
+  %or18.4.4 = or i8 %or18.3.4, %6
+  %or18.5.4 = or i8 %or18.4.4, %7
+  %or18.6.4 = or i8 %or18.5.4, %8
+  %or18.537 = or i8 %or18.6.4, %9
+  %or18.1.5 = or i8 %or18.537, %10
+  %or18.2.5 = or i8 %or18.1.5, %11
+  %or18.3.5 = or i8 %or18.2.5, %12
+  %or18.4.5 = or i8 %or18.3.5, %13
+  %or18.5.5 = or i8 %or18.4.5, %14
+  %or18.6.5 = or i8 %or18.5.5, %15
+  br label %for.cond
+}
+

From f4c498bc7399b00bd7b1157645cf03906fbe7954 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Mon, 26 Aug 2024 07:09:57 -0700
Subject: [PATCH 19/65] [FixIrreducible]Fix verify call

---
 llvm/lib/Transforms/Utils/FixIrreducible.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 67fb806d3eae54..4bcd85ff2336bd 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -342,7 +342,7 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
 #if defined(EXPENSIVE_CHECKS)
   CI.verify();
   if (LI) {
-    LI.verify(DT);
+    LI->verify(DT);
   }
 #endif // EXPENSIVE_CHECKS
 

From 11ba2eee59c6c7269b2dae27247048f828143274 Mon Sep 17 00:00:00 2001
From: Chris Apple <cja-private@pm.me>
Date: Mon, 26 Aug 2024 07:15:07 -0700
Subject: [PATCH 20/65] [rtsan][compiler-rt] Disable file permissions test
 causing build failure (#106079)

Related to: ca95bee64972, #105732
---
 .../rtsan/tests/rtsan_test_interceptors.cpp   | 32 +++++++++++--------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp
index 47c07b3e47abd7..5b88cf64612942 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp
@@ -184,19 +184,25 @@ TEST_F(RtsanFileTest, OpenatDiesWhenRealtime) {
   ExpectNonRealtimeSurvival(func);
 }
 
-TEST_F(RtsanFileTest, OpenCreatesFileWithProperMode) {
-  const int mode = S_IRGRP | S_IROTH | S_IRUSR | S_IWUSR;
-
-  const int fd = open(GetTemporaryFilePath(), O_CREAT | O_WRONLY, mode);
-  ASSERT_THAT(fd, Ne(-1));
-  close(fd);
-
-  struct stat st;
-  ASSERT_THAT(stat(GetTemporaryFilePath(), &st), Eq(0));
-
-  // Mask st_mode to get permission bits only
-  ASSERT_THAT(st.st_mode & 0777, Eq(mode));
-}
+// FIXME: This fails on the build machines, but not locally!
+// see https://github.com/llvm/llvm-project/pull/105732#issuecomment-2310286530
+// Value of: st.st_mode & 0777
+// Expected: is equal to 420
+// Actual: 384
+// TEST_F(RtsanFileTest, OpenCreatesFileWithProperMode) {
+//   const int mode = S_IRGRP | S_IROTH | S_IRUSR | S_IWUSR;
+//
+//   const int fd = open(GetTemporaryFilePath(), O_CREAT | O_WRONLY, mode);
+//   ASSERT_THAT(fd, Ne(-1));
+//   close(fd);
+//
+//   struct stat st;
+//   ASSERT_THAT(stat(GetTemporaryFilePath(), &st), Eq(0));
+//
+//   // Mask st_mode to get permission bits only
+//
+//   //ASSERT_THAT(st.st_mode & 0777, Eq(mode)); FAILED ASSERTION
+// }
 
 TEST_F(RtsanFileTest, CreatDiesWhenRealtime) {
   auto func = [this]() { creat(GetTemporaryFilePath(), S_IWOTH | S_IROTH); };

From 399d7cce3731096ff20ee6bdb505e18dab468915 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Mon, 26 Aug 2024 07:22:17 -0700
Subject: [PATCH 21/65] [CodeGen] Use MachineInstr::all_defs (NFC) (#106017)

---
 llvm/include/llvm/CodeGen/LiveVariables.h     |  4 ++--
 llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp |  3 +--
 llvm/lib/CodeGen/MachineInstr.cpp             | 21 +++++++------------
 llvm/lib/CodeGen/ModuloSchedule.cpp           |  4 ++--
 llvm/lib/CodeGen/RegAllocFast.cpp             | 14 ++++---------
 llvm/lib/CodeGen/RegisterCoalescer.cpp        |  4 ++--
 6 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h
index b73850bb757ec3..89d1b5edf3fa63 100644
--- a/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -253,8 +253,8 @@ class LiveVariables {
       return false;
 
     bool Removed = false;
-    for (MachineOperand &MO : MI.operands()) {
-      if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
+    for (MachineOperand &MO : MI.all_defs()) {
+      if (MO.getReg() == Reg) {
         MO.setIsDead(false);
         Removed = true;
         break;
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index bccd9b04cd2c5c..e40248197c7c7c 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -402,8 +402,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
 
   // Scan the register defs for this instruction and update
   // live-ranges.
-  for (const MachineOperand &MO : MI.operands()) {
-    if (!MO.isReg() || !MO.isDef()) continue;
+  for (const MachineOperand &MO : MI.all_defs()) {
     Register Reg = MO.getReg();
     if (Reg == 0) continue;
     // Ignore KILLs and passthru registers for liveness...
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 0f2acdb12389d4..f21910ee3a444a 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -2125,19 +2125,15 @@ bool MachineInstr::addRegisterDead(Register Reg,
 }
 
 void MachineInstr::clearRegisterDeads(Register Reg) {
-  for (MachineOperand &MO : operands()) {
-    if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
-      continue;
-    MO.setIsDead(false);
-  }
+  for (MachineOperand &MO : all_defs())
+    if (MO.getReg() == Reg)
+      MO.setIsDead(false);
 }
 
 void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) {
-  for (MachineOperand &MO : operands()) {
-    if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
-      continue;
-    MO.setIsUndef(IsUndef);
-  }
+  for (MachineOperand &MO : all_defs())
+    if (MO.getReg() == Reg && MO.getSubReg() != 0)
+      MO.setIsUndef(IsUndef);
 }
 
 void MachineInstr::addRegisterDefined(Register Reg,
@@ -2147,9 +2143,8 @@ void MachineInstr::addRegisterDefined(Register Reg,
     if (MO)
       return;
   } else {
-    for (const MachineOperand &MO : operands()) {
-      if (MO.isReg() && MO.getReg() == Reg && MO.isDef() &&
-          MO.getSubReg() == 0)
+    for (const MachineOperand &MO : all_defs()) {
+      if (MO.getReg() == Reg && MO.getSubReg() == 0)
         return;
     }
   }
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 78201d9bfb79a9..99c82bc3a2660a 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -2667,8 +2667,8 @@ void ModuloScheduleExpanderMVE::calcNumUnroll() {
 void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
                                                ValueMapTy &VRMap,
                                                bool LastDef) {
-  for (MachineOperand &MO : NewMI->operands()) {
-    if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef())
+  for (MachineOperand &MO : NewMI->all_defs()) {
+    if (!MO.getReg().isVirtual())
       continue;
     Register Reg = MO.getReg();
     const TargetRegisterClass *RC = MRI.getRegClass(Reg);
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 62f7ed29c8c819..6babd5a3f1f96f 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1329,9 +1329,8 @@ void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) {
   // we assign these.
   SmallVector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
 
-  for (const MachineOperand &MO : MI.operands())
-    if (MO.isReg() && MO.isDef())
-      addRegClassDefCounts(RegClassDefCounts, MO.getReg());
+  for (const MachineOperand &MO : MI.all_defs())
+    addRegClassDefCounts(RegClassDefCounts, MO.getReg());
 
   llvm::sort(DefOperandIndexes, [&](unsigned I0, unsigned I1) {
     const MachineOperand &MO0 = MI.getOperand(I0);
@@ -1481,9 +1480,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
         // Assign virtual register defs.
         while (ReArrangedImplicitOps) {
           ReArrangedImplicitOps = false;
-          for (MachineOperand &MO : MI.operands()) {
-            if (!MO.isReg() || !MO.isDef())
-              continue;
+          for (MachineOperand &MO : MI.all_defs()) {
             Register Reg = MO.getReg();
             if (Reg.isVirtual()) {
               ReArrangedImplicitOps =
@@ -1499,10 +1496,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
     // Free registers occupied by defs.
     // Iterate operands in reverse order, so we see the implicit super register
     // defs first (we added them earlier in case of <def,read-undef>).
-    for (MachineOperand &MO : reverse(MI.operands())) {
-      if (!MO.isReg() || !MO.isDef())
-        continue;
-
+    for (MachineOperand &MO : reverse(MI.all_defs())) {
       Register Reg = MO.getReg();
 
       // subreg defs don't free the full register. We left the subreg number
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index f6c53f3051c2f0..97f8346df0e8fe 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3230,8 +3230,8 @@ void JoinVals::pruneValues(JoinVals &Other,
           // Also remove dead flags since the joined live range will
           // continue past this instruction.
           for (MachineOperand &MO :
-               Indexes->getInstructionFromIndex(Def)->operands()) {
-            if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
+               Indexes->getInstructionFromIndex(Def)->all_defs()) {
+            if (MO.getReg() == Reg) {
               if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
                 MO.setIsUndef(false);
               MO.setIsDead(false);

From d9e728601938f7d587ac580d32f042fa74041864 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777@gmail.com>
Date: Mon, 26 Aug 2024 22:23:07 +0800
Subject: [PATCH 22/65] [NFC][clang][bytecode] Rename
 `clang::interp::State::getCtx` to `clang::interp::State::getASTContext`
 (#106071)

The new constant interpreter's `clang::interp::InterpState` contains
both `clang::interp::Context` and `clang::ASTContext`. So using `S.Ctx`
and `S.getCtx()` was a bit confusing. This PR rename `getCtx()` to
`getASTContext` to make things more clearer.

Signed-off-by: yronglin <yronglin777@gmail.com>
---
 clang/lib/AST/ByteCode/EvalEmitter.cpp   |  2 +-
 clang/lib/AST/ByteCode/Interp.cpp        | 10 ++---
 clang/lib/AST/ByteCode/Interp.h          | 50 ++++++++++++------------
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 41 +++++++++----------
 clang/lib/AST/ByteCode/InterpFrame.cpp   | 14 +++----
 clang/lib/AST/ByteCode/InterpState.h     |  2 +-
 clang/lib/AST/ByteCode/State.cpp         | 11 ++++--
 clang/lib/AST/ByteCode/State.h           |  2 +-
 clang/lib/AST/ExprConstant.cpp           | 12 +++---
 9 files changed, 75 insertions(+), 69 deletions(-)

diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp
index 53ec8f52d4921f..3b9e5f9f9f69cd 100644
--- a/clang/lib/AST/ByteCode/EvalEmitter.cpp
+++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp
@@ -219,7 +219,7 @@ bool EvalEmitter::emitRetValue(const SourceInfo &Info) {
     return false;
 
   if (std::optional<APValue> APV =
-          Ptr.toRValue(S.getCtx(), EvalResult.getSourceType())) {
+          Ptr.toRValue(S.getASTContext(), EvalResult.getSourceType())) {
     EvalResult.setValue(*APV);
     return true;
   }
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index aea303f0e630c9..09d3f4525138ed 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -326,7 +326,7 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) {
   auto IsConstType = [&S](const VarDecl *VD) -> bool {
     QualType T = VD->getType();
 
-    if (T.isConstant(S.getCtx()))
+    if (T.isConstant(S.getASTContext()))
       return true;
 
     if (S.getLangOpts().CPlusPlus && !S.getLangOpts().CPlusPlus11)
@@ -523,9 +523,9 @@ bool CheckGlobalInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
   assert(S.getLangOpts().CPlusPlus);
   const auto *VD = cast<VarDecl>(Ptr.getDeclDesc()->asValueDecl());
   if ((!VD->hasConstantInitialization() &&
-       VD->mightBeUsableInConstantExpressions(S.getCtx())) ||
+       VD->mightBeUsableInConstantExpressions(S.getASTContext())) ||
       (S.getLangOpts().OpenCL && !S.getLangOpts().CPlusPlus11 &&
-       !VD->hasICEInitializer(S.getCtx()))) {
+       !VD->hasICEInitializer(S.getASTContext()))) {
     const SourceInfo &Loc = S.Current->getSource(OpPC);
     S.FFDiag(Loc, diag::note_constexpr_var_init_non_constant, 1) << VD;
     S.Note(VD->getLocation(), diag::note_declared_at);
@@ -797,7 +797,7 @@ bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray,
   // but we want to get the array size right.
   if (D->isArray()) {
     QualType ElemQT = D->getType()->getPointeeType();
-    TypeToDiagnose = S.getCtx().getConstantArrayType(
+    TypeToDiagnose = S.getASTContext().getConstantArrayType(
         ElemQT, APInt(64, static_cast<uint64_t>(D->getNumElems()), false),
         nullptr, ArraySizeModifier::Normal, 0);
   } else
@@ -819,7 +819,7 @@ bool CheckDeleteSource(InterpState &S, CodePtr OpPC, const Expr *Source,
   // Whatever this is, we didn't heap allocate it.
   const SourceInfo &Loc = S.Current->getSource(OpPC);
   S.FFDiag(Loc, diag::note_constexpr_delete_not_heap_alloc)
-      << Ptr.toDiagnosticString(S.getCtx());
+      << Ptr.toDiagnosticString(S.getASTContext());
 
   if (Ptr.isTemporary())
     S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here);
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 81c547991c3d7d..242532a3f0544e 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -41,7 +41,7 @@ using APSInt = llvm::APSInt;
 /// Convert a value to an APValue.
 template <typename T>
 bool ReturnValue(const InterpState &S, const T &V, APValue &R) {
-  R = V.toAPValue(S.getCtx());
+  R = V.toAPValue(S.getASTContext());
   return true;
 }
 
@@ -231,12 +231,12 @@ bool CheckArraySize(InterpState &S, CodePtr OpPC, SizeT *NumElements,
   // constructing the array, we catch this here.
   SizeT MaxElements = SizeT::from(Descriptor::MaxArrayElemBytes / ElemSize);
   if (NumElements->toAPSInt().getActiveBits() >
-          ConstantArrayType::getMaxSizeBits(S.getCtx()) ||
+          ConstantArrayType::getMaxSizeBits(S.getASTContext()) ||
       *NumElements > MaxElements) {
     if (!IsNoThrow) {
       const SourceInfo &Loc = S.Current->getSource(OpPC);
       S.FFDiag(Loc, diag::note_constexpr_new_too_large)
-          << NumElements->toDiagnosticString(S.getCtx());
+          << NumElements->toDiagnosticString(S.getASTContext());
     }
     return false;
   }
@@ -911,8 +911,8 @@ inline bool CmpHelper<FunctionPointer>(InterpState &S, CodePtr OpPC,
 
   const SourceInfo &Loc = S.Current->getSource(OpPC);
   S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified)
-      << LHS.toDiagnosticString(S.getCtx())
-      << RHS.toDiagnosticString(S.getCtx());
+      << LHS.toDiagnosticString(S.getASTContext())
+      << RHS.toDiagnosticString(S.getASTContext());
   return false;
 }
 
@@ -927,7 +927,7 @@ inline bool CmpHelperEQ<FunctionPointer>(InterpState &S, CodePtr OpPC,
     if (FP.isWeak()) {
       const SourceInfo &Loc = S.Current->getSource(OpPC);
       S.FFDiag(Loc, diag::note_constexpr_pointer_weak_comparison)
-          << FP.toDiagnosticString(S.getCtx());
+          << FP.toDiagnosticString(S.getASTContext());
       return false;
     }
   }
@@ -945,8 +945,8 @@ inline bool CmpHelper<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
   if (!Pointer::hasSameBase(LHS, RHS)) {
     const SourceInfo &Loc = S.Current->getSource(OpPC);
     S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified)
-        << LHS.toDiagnosticString(S.getCtx())
-        << RHS.toDiagnosticString(S.getCtx());
+        << LHS.toDiagnosticString(S.getASTContext())
+        << RHS.toDiagnosticString(S.getASTContext());
     return false;
   } else {
     unsigned VL = LHS.getByteOffset();
@@ -974,7 +974,7 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
     if (P.isWeak()) {
       const SourceInfo &Loc = S.Current->getSource(OpPC);
       S.FFDiag(Loc, diag::note_constexpr_pointer_weak_comparison)
-          << P.toDiagnosticString(S.getCtx());
+          << P.toDiagnosticString(S.getASTContext());
       return false;
     }
   }
@@ -984,13 +984,13 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
         RHS.getOffset() == 0) {
       const SourceInfo &Loc = S.Current->getSource(OpPC);
       S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end)
-          << LHS.toDiagnosticString(S.getCtx());
+          << LHS.toDiagnosticString(S.getASTContext());
       return false;
     } else if (RHS.isOnePastEnd() && !LHS.isOnePastEnd() && !LHS.isZero() &&
                LHS.getOffset() == 0) {
       const SourceInfo &Loc = S.Current->getSource(OpPC);
       S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end)
-          << RHS.toDiagnosticString(S.getCtx());
+          << RHS.toDiagnosticString(S.getASTContext());
       return false;
     }
 
@@ -1073,8 +1073,8 @@ bool CMP3(InterpState &S, CodePtr OpPC, const ComparisonCategoryInfo *CmpInfo) {
     // This should only happen with pointers.
     const SourceInfo &Loc = S.Current->getSource(OpPC);
     S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_unspecified)
-        << LHS.toDiagnosticString(S.getCtx())
-        << RHS.toDiagnosticString(S.getCtx());
+        << LHS.toDiagnosticString(S.getASTContext())
+        << RHS.toDiagnosticString(S.getASTContext());
     return false;
   }
 
@@ -1342,7 +1342,7 @@ bool InitGlobalTemp(InterpState &S, CodePtr OpPC, uint32_t I,
   const Pointer &Ptr = S.P.getGlobal(I);
 
   const T Value = S.Stk.peek<T>();
-  APValue APV = Value.toAPValue(S.getCtx());
+  APValue APV = Value.toAPValue(S.getASTContext());
   APValue *Cached = Temp->getOrCreateValue(true);
   *Cached = APV;
 
@@ -1369,7 +1369,7 @@ inline bool InitGlobalTempComp(InterpState &S, CodePtr OpPC,
       std::make_pair(P.getDeclDesc()->asExpr(), Temp));
 
   if (std::optional<APValue> APV =
-          P.toRValue(S.getCtx(), Temp->getTemporaryExpr()->getType())) {
+          P.toRValue(S.getASTContext(), Temp->getTemporaryExpr()->getType())) {
     *Cached = *APV;
     return true;
   }
@@ -1404,7 +1404,8 @@ bool InitThisBitField(InterpState &S, CodePtr OpPC, const Record::Field *F,
     return false;
   const Pointer &Field = This.atField(FieldOffset);
   const auto &Value = S.Stk.pop<T>();
-  Field.deref<T>() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx()));
+  Field.deref<T>() =
+      Value.truncate(F->Decl->getBitWidthValue(S.getASTContext()));
   Field.initialize();
   return true;
 }
@@ -1427,7 +1428,8 @@ bool InitBitField(InterpState &S, CodePtr OpPC, const Record::Field *F) {
   assert(F->isBitField());
   const T &Value = S.Stk.pop<T>();
   const Pointer &Field = S.Stk.peek<Pointer>().atField(F->Offset);
-  Field.deref<T>() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx()));
+  Field.deref<T>() =
+      Value.truncate(F->Decl->getBitWidthValue(S.getASTContext()));
   Field.activate();
   Field.initialize();
   return true;
@@ -1477,7 +1479,7 @@ inline bool GetPtrField(InterpState &S, CodePtr OpPC, uint32_t Off) {
     return false;
 
   if (Ptr.isIntegralPointer()) {
-    S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getCtx(), Off));
+    S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getASTContext(), Off));
     return true;
   }
 
@@ -1505,7 +1507,7 @@ inline bool GetPtrFieldPop(InterpState &S, CodePtr OpPC, uint32_t Off) {
     return false;
 
   if (Ptr.isIntegralPointer()) {
-    S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getCtx(), Off));
+    S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getASTContext(), Off));
     return true;
   }
 
@@ -1721,7 +1723,7 @@ bool StoreBitField(InterpState &S, CodePtr OpPC) {
   if (Ptr.canBeInitialized())
     Ptr.initialize();
   if (const auto *FD = Ptr.getField())
-    Ptr.deref<T>() = Value.truncate(FD->getBitWidthValue(S.getCtx()));
+    Ptr.deref<T>() = Value.truncate(FD->getBitWidthValue(S.getASTContext()));
   else
     Ptr.deref<T>() = Value;
   return true;
@@ -1736,7 +1738,7 @@ bool StoreBitFieldPop(InterpState &S, CodePtr OpPC) {
   if (Ptr.canBeInitialized())
     Ptr.initialize();
   if (const auto *FD = Ptr.getField())
-    Ptr.deref<T>() = Value.truncate(FD->getBitWidthValue(S.getCtx()));
+    Ptr.deref<T>() = Value.truncate(FD->getBitWidthValue(S.getASTContext()));
   else
     Ptr.deref<T>() = Value;
   return true;
@@ -2014,7 +2016,7 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) {
       while (auto *AT = dyn_cast<ArrayType>(PtrT))
         PtrT = AT->getElementType();
 
-      QualType ArrayTy = S.getCtx().getConstantArrayType(
+      QualType ArrayTy = S.getASTContext().getConstantArrayType(
           PtrT, APInt::getZero(1), nullptr, ArraySizeModifier::Normal, 0);
       S.FFDiag(S.Current->getSource(OpPC),
                diag::note_constexpr_pointer_subtraction_zero_size)
@@ -2953,7 +2955,7 @@ inline bool CheckDecl(InterpState &S, CodePtr OpPC, const VarDecl *VD) {
   if (VD == S.EvaluatingDecl)
     return true;
 
-  if (!VD->isUsableInConstantExpressions(S.getCtx())) {
+  if (!VD->isUsableInConstantExpressions(S.getASTContext())) {
     S.CCEDiag(VD->getLocation(), diag::note_constexpr_static_local)
         << (VD->getTSCSpec() == TSCS_unspecified ? 0 : 1) << VD;
     return false;
@@ -3047,7 +3049,7 @@ static inline bool Free(InterpState &S, CodePtr OpPC, bool DeleteIsArrayForm) {
     if (!Ptr.isRoot() || Ptr.isOnePastEnd() || Ptr.isArrayElement()) {
       const SourceInfo &Loc = S.Current->getSource(OpPC);
       S.FFDiag(Loc, diag::note_constexpr_delete_subobject)
-          << Ptr.toDiagnosticString(S.getCtx()) << Ptr.isOnePastEnd();
+          << Ptr.toDiagnosticString(S.getASTContext()) << Ptr.isOnePastEnd();
       return false;
     }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 26abf582051067..1a71bff25d2540 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -38,7 +38,7 @@ static T getParam(const InterpFrame *Frame, unsigned Index) {
 }
 
 PrimType getIntPrimType(const InterpState &S) {
-  const TargetInfo &TI = S.getCtx().getTargetInfo();
+  const TargetInfo &TI = S.getASTContext().getTargetInfo();
   unsigned IntWidth = TI.getIntWidth();
 
   if (IntWidth == 32)
@@ -49,7 +49,7 @@ PrimType getIntPrimType(const InterpState &S) {
 }
 
 PrimType getLongPrimType(const InterpState &S) {
-  const TargetInfo &TI = S.getCtx().getTargetInfo();
+  const TargetInfo &TI = S.getASTContext().getTargetInfo();
   unsigned LongWidth = TI.getLongWidth();
 
   if (LongWidth == 64)
@@ -272,10 +272,10 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC,
     return false;
 
   const llvm::fltSemantics &TargetSemantics =
-      S.getCtx().getFloatTypeSemantics(F->getDecl()->getReturnType());
+      S.getASTContext().getFloatTypeSemantics(F->getDecl()->getReturnType());
 
   Floating Result;
-  if (S.getCtx().getTargetInfo().isNan2008()) {
+  if (S.getASTContext().getTargetInfo().isNan2008()) {
     if (Signaling)
       Result = Floating(
           llvm::APFloat::getSNaN(TargetSemantics, /*Negative=*/false, &Fill));
@@ -303,7 +303,7 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC,
 static bool interp__builtin_inf(InterpState &S, CodePtr OpPC,
                                 const InterpFrame *Frame, const Function *F) {
   const llvm::fltSemantics &TargetSemantics =
-      S.getCtx().getFloatTypeSemantics(F->getDecl()->getReturnType());
+      S.getASTContext().getFloatTypeSemantics(F->getDecl()->getReturnType());
 
   S.Stk.push<Floating>(Floating::getInf(TargetSemantics));
   return true;
@@ -689,8 +689,8 @@ static bool interp__builtin_eh_return_data_regno(InterpState &S, CodePtr OpPC,
   PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
   APSInt Arg = peekToAPSInt(S.Stk, ArgT);
 
-  int Result =
-      S.getCtx().getTargetInfo().getEHDataRegisterNumber(Arg.getZExtValue());
+  int Result = S.getASTContext().getTargetInfo().getEHDataRegisterNumber(
+      Arg.getZExtValue());
   pushInteger(S, Result, Call->getType());
   return true;
 }
@@ -734,7 +734,7 @@ static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC,
                      ResultType->isSignedIntegerOrEnumerationType();
     uint64_t LHSSize = LHS.getBitWidth();
     uint64_t RHSSize = RHS.getBitWidth();
-    uint64_t ResultSize = S.getCtx().getTypeSize(ResultType);
+    uint64_t ResultSize = S.getASTContext().getTypeSize(ResultType);
     uint64_t MaxBits = std::max(std::max(LHSSize, RHSSize), ResultSize);
 
     // Add an additional bit if the signedness isn't uniformly agreed to. We
@@ -794,7 +794,7 @@ static bool interp__builtin_overflowop(InterpState &S, CodePtr OpPC,
     // since it will give us the behavior of a TruncOrSelf in the case where
     // its parameter <= its size.  We previously set Result to be at least the
     // type-size of the result, so getTypeSize(ResultType) <= Resu
-    APSInt Temp = Result.extOrTrunc(S.getCtx().getTypeSize(ResultType));
+    APSInt Temp = Result.extOrTrunc(S.getASTContext().getTypeSize(ResultType));
     Temp.setIsSigned(ResultType->isSignedIntegerOrEnumerationType());
 
     if (!APSInt::isSameValue(Temp, Result))
@@ -974,8 +974,8 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
   if (Size.isPowerOfTwo()) {
     // Check against inlining width.
     unsigned InlineWidthBits =
-        S.getCtx().getTargetInfo().getMaxAtomicInlineWidth();
-    if (Size <= S.getCtx().toCharUnitsFromBits(InlineWidthBits)) {
+        S.getASTContext().getTargetInfo().getMaxAtomicInlineWidth();
+    if (Size <= S.getASTContext().toCharUnitsFromBits(InlineWidthBits)) {
 
       // OK, we will inline appropriately-aligned operations of this size,
       // and _Atomic(T) is appropriately-aligned.
@@ -1007,7 +1007,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
       if (auto PtrTy = PtrArg->getType()->getAs<PointerType>()) {
         QualType PointeeType = PtrTy->getPointeeType();
         if (!PointeeType->isIncompleteType() &&
-            S.getCtx().getTypeAlignInChars(PointeeType) >= Size) {
+            S.getASTContext().getTypeAlignInChars(PointeeType) >= Size) {
           // OK, we will inline operations on this object.
           return returnBool(true);
         }
@@ -1059,7 +1059,7 @@ static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC,
     S.FFDiag(Call, diag::note_constexpr_invalid_alignment) << Alignment;
     return false;
   }
-  unsigned SrcWidth = S.getCtx().getIntWidth(Call->getArg(0)->getType());
+  unsigned SrcWidth = S.getASTContext().getIntWidth(Call->getArg(0)->getType());
   APSInt MaxValue(APInt::getOneBitSet(SrcWidth, SrcWidth - 1));
   if (APSInt::compareValues(Alignment, MaxValue) > 0) {
     S.FFDiag(Call, diag::note_constexpr_alignment_too_big)
@@ -1094,7 +1094,7 @@ static bool interp__builtin_is_aligned_up_down(InterpState &S, CodePtr OpPC,
   unsigned PtrOffset = Ptr.getByteOffset();
   PtrOffset = Ptr.getIndex();
   CharUnits BaseAlignment =
-      S.getCtx().getDeclAlign(Ptr.getDeclDesc()->asValueDecl());
+      S.getASTContext().getDeclAlign(Ptr.getDeclDesc()->asValueDecl());
   CharUnits PtrAlign =
       BaseAlignment.alignmentAtOffset(CharUnits::fromQuantity(PtrOffset));
 
@@ -1157,7 +1157,7 @@ static bool interp__builtin_os_log_format_buffer_size(InterpState &S,
                                                       const Function *Func,
                                                       const CallExpr *Call) {
   analyze_os_log::OSLogBufferLayout Layout;
-  analyze_os_log::computeOSLogBufferLayout(S.getCtx(), Call, Layout);
+  analyze_os_log::computeOSLogBufferLayout(S.getASTContext(), Call, Layout);
   pushInteger(S, Layout.size().getQuantity(), Call->getType());
   return true;
 }
@@ -1624,10 +1624,11 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
       const RecordDecl *RD = RT->getDecl();
       if (RD->isInvalidDecl())
         return false;
-      const ASTRecordLayout &RL = S.getCtx().getASTRecordLayout(RD);
+      const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(RD);
       unsigned FieldIndex = MemberDecl->getFieldIndex();
       assert(FieldIndex < RL.getFieldCount() && "offsetof field in wrong type");
-      Result += S.getCtx().toCharUnitsFromBits(RL.getFieldOffset(FieldIndex));
+      Result +=
+          S.getASTContext().toCharUnitsFromBits(RL.getFieldOffset(FieldIndex));
       CurrentType = MemberDecl->getType().getNonReferenceType();
       break;
     }
@@ -1635,11 +1636,11 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
       // When generating bytecode, we put all the index expressions as Sint64 on
       // the stack.
       int64_t Index = ArrayIndices[ArrayIndex];
-      const ArrayType *AT = S.getCtx().getAsArrayType(CurrentType);
+      const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType);
       if (!AT)
         return false;
       CurrentType = AT->getElementType();
-      CharUnits ElementSize = S.getCtx().getTypeSizeInChars(CurrentType);
+      CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType);
       Result += Index * ElementSize;
       ++ArrayIndex;
       break;
@@ -1656,7 +1657,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
       const RecordDecl *RD = RT->getDecl();
       if (RD->isInvalidDecl())
         return false;
-      const ASTRecordLayout &RL = S.getCtx().getASTRecordLayout(RD);
+      const ASTRecordLayout &RL = S.getASTContext().getASTRecordLayout(RD);
 
       // Find the base class itself.
       CurrentType = BaseSpec->getType();
diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp
index 8b55b61cbbfa7e..5e98444ef05a59 100644
--- a/clang/lib/AST/ByteCode/InterpFrame.cpp
+++ b/clang/lib/AST/ByteCode/InterpFrame.cpp
@@ -179,7 +179,7 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const {
     if (const auto *MCE = dyn_cast_if_present<CXXMemberCallExpr>(CallExpr)) {
       const Expr *Object = MCE->getImplicitObjectArgument();
       Object->printPretty(OS, /*Helper=*/nullptr,
-                          S.getCtx().getPrintingPolicy(),
+                          S.getASTContext().getPrintingPolicy(),
                           /*Indentation=*/0);
       if (Object->getType()->isPointerType())
         OS << "->";
@@ -188,18 +188,18 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const {
     } else if (const auto *OCE =
                    dyn_cast_if_present<CXXOperatorCallExpr>(CallExpr)) {
       OCE->getArg(0)->printPretty(OS, /*Helper=*/nullptr,
-                                  S.getCtx().getPrintingPolicy(),
+                                  S.getASTContext().getPrintingPolicy(),
                                   /*Indentation=*/0);
       OS << ".";
     } else if (const auto *M = dyn_cast<CXXMethodDecl>(F)) {
-      print(OS, This, S.getCtx(),
-            S.getCtx().getLValueReferenceType(
-                S.getCtx().getRecordType(M->getParent())));
+      print(OS, This, S.getASTContext(),
+            S.getASTContext().getLValueReferenceType(
+                S.getASTContext().getRecordType(M->getParent())));
       OS << ".";
     }
   }
 
-  F->getNameForDiagnostic(OS, S.getCtx().getPrintingPolicy(),
+  F->getNameForDiagnostic(OS, S.getASTContext().getPrintingPolicy(),
                           /*Qualified=*/false);
   OS << '(';
   unsigned Off = 0;
@@ -212,7 +212,7 @@ void InterpFrame::describe(llvm::raw_ostream &OS) const {
 
     PrimType PrimTy = S.Ctx.classify(Ty).value_or(PT_Ptr);
 
-    TYPE_SWITCH(PrimTy, print(OS, stackRef<T>(Off), S.getCtx(), Ty));
+    TYPE_SWITCH(PrimTy, print(OS, stackRef<T>(Off), S.getASTContext(), Ty));
     Off += align(primSize(PrimTy));
     if (I + 1 != N)
       OS << ", ";
diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h
index 61ee54331c65d6..961ba5f5c28a09 100644
--- a/clang/lib/AST/ByteCode/InterpState.h
+++ b/clang/lib/AST/ByteCode/InterpState.h
@@ -59,7 +59,7 @@ class InterpState final : public State, public SourceMapper {
   Expr::EvalStatus &getEvalStatus() const override {
     return Parent.getEvalStatus();
   }
-  ASTContext &getCtx() const override { return Parent.getCtx(); }
+  ASTContext &getASTContext() const override { return Parent.getASTContext(); }
 
   // Forward status checks and updates to the walker.
   bool checkingForUndefinedBehavior() const override {
diff --git a/clang/lib/AST/ByteCode/State.cpp b/clang/lib/AST/ByteCode/State.cpp
index 0d9dadec4b9581..b4db86e8d22c71 100644
--- a/clang/lib/AST/ByteCode/State.cpp
+++ b/clang/lib/AST/ByteCode/State.cpp
@@ -74,12 +74,12 @@ void State::addNotes(ArrayRef<PartialDiagnosticAt> Diags) {
 }
 
 DiagnosticBuilder State::report(SourceLocation Loc, diag::kind DiagId) {
-  return getCtx().getDiagnostics().Report(Loc, DiagId);
+  return getASTContext().getDiagnostics().Report(Loc, DiagId);
 }
 
 /// Add a diagnostic to the diagnostics list.
 PartialDiagnostic &State::addDiag(SourceLocation Loc, diag::kind DiagId) {
-  PartialDiagnostic PD(DiagId, getCtx().getDiagAllocator());
+  PartialDiagnostic PD(DiagId, getASTContext().getDiagAllocator());
   getEvalStatus().Diag->push_back(std::make_pair(Loc, PD));
   return getEvalStatus().Diag->back().second;
 }
@@ -93,7 +93,8 @@ OptionalDiagnostic State::diag(SourceLocation Loc, diag::kind DiagId,
     }
 
     unsigned CallStackNotes = getCallStackDepth() - 1;
-    unsigned Limit = getCtx().getDiagnostics().getConstexprBacktraceLimit();
+    unsigned Limit =
+        getASTContext().getDiagnostics().getConstexprBacktraceLimit();
     if (Limit)
       CallStackNotes = std::min(CallStackNotes, Limit + 1);
     if (checkingPotentialConstantExpression())
@@ -113,7 +114,9 @@ OptionalDiagnostic State::diag(SourceLocation Loc, diag::kind DiagId,
   return OptionalDiagnostic();
 }
 
-const LangOptions &State::getLangOpts() const { return getCtx().getLangOpts(); }
+const LangOptions &State::getLangOpts() const {
+  return getASTContext().getLangOpts();
+}
 
 void State::addCallStack(unsigned Limit) {
   // Determine which calls to skip, if any.
diff --git a/clang/lib/AST/ByteCode/State.h b/clang/lib/AST/ByteCode/State.h
index 44d6c037c5ad95..2cffce4bc2ae40 100644
--- a/clang/lib/AST/ByteCode/State.h
+++ b/clang/lib/AST/ByteCode/State.h
@@ -67,7 +67,7 @@ class State {
   virtual void setActiveDiagnostic(bool Flag) = 0;
   virtual void setFoldFailureDiagnostic(bool Flag) = 0;
   virtual Expr::EvalStatus &getEvalStatus() const = 0;
-  virtual ASTContext &getCtx() const = 0;
+  virtual ASTContext &getASTContext() const = 0;
   virtual bool hasPriorDiagnostic() = 0;
   virtual unsigned getCallStackDepth() = 0;
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 826cc5f58bdf51..d46f57521a97d3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1030,7 +1030,7 @@ namespace {
       discardCleanups();
     }
 
-    ASTContext &getCtx() const override { return Ctx; }
+    ASTContext &getASTContext() const override { return Ctx; }
 
     void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value,
                            EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) {
@@ -2327,9 +2327,9 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc,
 
       // In CUDA/HIP device compilation, only device side variables have
       // constant addresses.
-      if (Info.getCtx().getLangOpts().CUDA &&
-          Info.getCtx().getLangOpts().CUDAIsDevice &&
-          Info.getCtx().CUDAConstantEvalCtx.NoWrongSidedVars) {
+      if (Info.getASTContext().getLangOpts().CUDA &&
+          Info.getASTContext().getLangOpts().CUDAIsDevice &&
+          Info.getASTContext().CUDAConstantEvalCtx.NoWrongSidedVars) {
         if ((!Var->hasAttr<CUDADeviceAttr>() &&
              !Var->hasAttr<CUDAConstantAttr>() &&
              !Var->getType()->isCUDADeviceBuiltinSurfaceType() &&
@@ -5662,7 +5662,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
         *Info.CurrentCall, hasSpecificAttr<MSConstexprAttr>(AS->getAttrs()) &&
                                isa<ReturnStmt>(SS));
 
-    auto LO = Info.getCtx().getLangOpts();
+    auto LO = Info.getASTContext().getLangOpts();
     if (LO.CXXAssumptions && !LO.MSVCCompat) {
       for (auto *Attr : AS->getAttrs()) {
         auto *AA = dyn_cast<CXXAssumeAttr>(Attr);
@@ -5673,7 +5673,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
         if (Assumption->isValueDependent())
           return ESR_Failed;
 
-        if (Assumption->HasSideEffects(Info.getCtx()))
+        if (Assumption->HasSideEffects(Info.getASTContext()))
           continue;
 
         bool Value;

From 06ccd32bb7b26b84448036c18e7191d9e56adc82 Mon Sep 17 00:00:00 2001
From: Dmitry Vasilyev <dvassiliev@accesssoftek.com>
Date: Mon, 26 Aug 2024 18:26:46 +0400
Subject: [PATCH 23/65] [lldb][NFC] Moved the SharedSocket class to Socket.*
 (#104787)

This is the prerequisite for #104238.
---
 lldb/include/lldb/Host/Socket.h          |  24 +++++
 lldb/source/Host/common/Socket.cpp       |  76 ++++++++++++++
 lldb/tools/lldb-server/lldb-platform.cpp | 126 +++--------------------
 3 files changed, 114 insertions(+), 112 deletions(-)

diff --git a/lldb/include/lldb/Host/Socket.h b/lldb/include/lldb/Host/Socket.h
index 573c881f727d8f..304a91bdf6741b 100644
--- a/lldb/include/lldb/Host/Socket.h
+++ b/lldb/include/lldb/Host/Socket.h
@@ -19,6 +19,7 @@
 #include "lldb/Utility/Status.h"
 
 #ifdef _WIN32
+#include "lldb/Host/Pipe.h"
 #include "lldb/Host/windows/windows.h"
 #include <winsock2.h>
 #include <ws2tcpip.h>
@@ -32,12 +33,35 @@ namespace lldb_private {
 
 #if defined(_WIN32)
 typedef SOCKET NativeSocket;
+typedef lldb::pipe_t shared_fd_t;
 #else
 typedef int NativeSocket;
+typedef NativeSocket shared_fd_t;
 #endif
+class Socket;
 class TCPSocket;
 class UDPSocket;
 
+class SharedSocket {
+public:
+  static const shared_fd_t kInvalidFD;
+
+  SharedSocket(const Socket *socket, Status &error);
+
+  shared_fd_t GetSendableFD() { return m_fd; }
+
+  Status CompleteSending(lldb::pid_t child_pid);
+
+  static Status GetNativeSocket(shared_fd_t fd, NativeSocket &socket);
+
+private:
+#ifdef _WIN32
+  Pipe m_socket_pipe;
+  NativeSocket m_socket;
+#endif
+  shared_fd_t m_fd;
+};
+
 class Socket : public IOObject {
 public:
   enum SocketProtocol {
diff --git a/lldb/source/Host/common/Socket.cpp b/lldb/source/Host/common/Socket.cpp
index 7364a12280cfdd..aabd562b0557c6 100644
--- a/lldb/source/Host/common/Socket.cpp
+++ b/lldb/source/Host/common/Socket.cpp
@@ -56,10 +56,12 @@ using namespace lldb_private;
 typedef const char *set_socket_option_arg_type;
 typedef char *get_socket_option_arg_type;
 const NativeSocket Socket::kInvalidSocketValue = INVALID_SOCKET;
+const shared_fd_t SharedSocket::kInvalidFD = LLDB_INVALID_PIPE;
 #else  // #if defined(_WIN32)
 typedef const void *set_socket_option_arg_type;
 typedef void *get_socket_option_arg_type;
 const NativeSocket Socket::kInvalidSocketValue = -1;
+const shared_fd_t SharedSocket::kInvalidFD = Socket::kInvalidSocketValue;
 #endif // #if defined(_WIN32)
 
 static bool IsInterrupted() {
@@ -70,6 +72,80 @@ static bool IsInterrupted() {
 #endif
 }
 
+SharedSocket::SharedSocket(const Socket *socket, Status &error) {
+#ifdef _WIN32
+  m_socket = socket->GetNativeSocket();
+  m_fd = kInvalidFD;
+
+  // Create a pipe to transfer WSAPROTOCOL_INFO to the child process.
+  error = m_socket_pipe.CreateNew(true);
+  if (error.Fail())
+    return;
+
+  m_fd = m_socket_pipe.GetReadPipe();
+#else
+  m_fd = socket->GetNativeSocket();
+  error = Status();
+#endif
+}
+
+Status SharedSocket::CompleteSending(lldb::pid_t child_pid) {
+#ifdef _WIN32
+  // Transfer WSAPROTOCOL_INFO to the child process.
+  m_socket_pipe.CloseReadFileDescriptor();
+
+  WSAPROTOCOL_INFO protocol_info;
+  if (::WSADuplicateSocket(m_socket, child_pid, &protocol_info) ==
+      SOCKET_ERROR) {
+    int last_error = ::WSAGetLastError();
+    return Status("WSADuplicateSocket() failed, error: %d", last_error);
+  }
+
+  size_t num_bytes;
+  Status error =
+      m_socket_pipe.WriteWithTimeout(&protocol_info, sizeof(protocol_info),
+                                     std::chrono::seconds(10), num_bytes);
+  if (error.Fail())
+    return error;
+  if (num_bytes != sizeof(protocol_info))
+    return Status("WriteWithTimeout(WSAPROTOCOL_INFO) failed: %d bytes",
+                  num_bytes);
+#endif
+  return Status();
+}
+
+Status SharedSocket::GetNativeSocket(shared_fd_t fd, NativeSocket &socket) {
+#ifdef _WIN32
+  socket = Socket::kInvalidSocketValue;
+  // Read WSAPROTOCOL_INFO from the parent process and create NativeSocket.
+  WSAPROTOCOL_INFO protocol_info;
+  {
+    Pipe socket_pipe(fd, LLDB_INVALID_PIPE);
+    size_t num_bytes;
+    Status error =
+        socket_pipe.ReadWithTimeout(&protocol_info, sizeof(protocol_info),
+                                    std::chrono::seconds(10), num_bytes);
+    if (error.Fail())
+      return error;
+    if (num_bytes != sizeof(protocol_info)) {
+      return Status(
+          "socket_pipe.ReadWithTimeout(WSAPROTOCOL_INFO) failed: % d bytes",
+          num_bytes);
+    }
+  }
+  socket = ::WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO,
+                       FROM_PROTOCOL_INFO, &protocol_info, 0, 0);
+  if (socket == INVALID_SOCKET) {
+    return Status("WSASocket(FROM_PROTOCOL_INFO) failed: error %d",
+                  ::WSAGetLastError());
+  }
+  return Status();
+#else
+  socket = fd;
+  return Status();
+#endif
+}
+
 struct SocketScheme {
   const char *m_scheme;
   const Socket::SocketProtocol m_protocol;
diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp
index 82a3a0d6b4e51c..75f51132aa9cc6 100644
--- a/lldb/tools/lldb-server/lldb-platform.cpp
+++ b/lldb/tools/lldb-server/lldb-platform.cpp
@@ -47,108 +47,6 @@ using namespace llvm;
 
 // option descriptors for getopt_long_only()
 
-#ifdef _WIN32
-typedef pipe_t shared_fd_t;
-const shared_fd_t kInvalidSharedFD = LLDB_INVALID_PIPE;
-#else
-typedef NativeSocket shared_fd_t;
-const shared_fd_t kInvalidSharedFD = Socket::kInvalidSocketValue;
-#endif
-
-class SharedSocket {
-public:
-  SharedSocket(Connection *conn, Status &error) {
-    m_fd = kInvalidSharedFD;
-
-    const Socket *socket =
-        static_cast<const Socket *>(conn->GetReadObject().get());
-    if (socket == nullptr) {
-      error = Status("invalid conn socket");
-      return;
-    }
-
-#ifdef _WIN32
-    m_socket = socket->GetNativeSocket();
-
-    // Create a pipe to transfer WSAPROTOCOL_INFO to the child process.
-    error = m_socket_pipe.CreateNew(true);
-    if (error.Fail())
-      return;
-
-    m_fd = m_socket_pipe.GetReadPipe();
-#else
-    m_fd = socket->GetNativeSocket();
-    error = Status();
-#endif
-  }
-
-  shared_fd_t GetSendableFD() { return m_fd; }
-
-  Status CompleteSending(lldb::pid_t child_pid) {
-#ifdef _WIN32
-    // Transfer WSAPROTOCOL_INFO to the child process.
-    m_socket_pipe.CloseReadFileDescriptor();
-
-    WSAPROTOCOL_INFO protocol_info;
-    if (::WSADuplicateSocket(m_socket, child_pid, &protocol_info) ==
-        SOCKET_ERROR) {
-      int last_error = ::WSAGetLastError();
-      return Status("WSADuplicateSocket() failed, error: %d", last_error);
-    }
-
-    size_t num_bytes;
-    Status error =
-        m_socket_pipe.WriteWithTimeout(&protocol_info, sizeof(protocol_info),
-                                       std::chrono::seconds(10), num_bytes);
-    if (error.Fail())
-      return error;
-    if (num_bytes != sizeof(protocol_info))
-      return Status("WriteWithTimeout(WSAPROTOCOL_INFO) failed: %d bytes",
-                    num_bytes);
-#endif
-    return Status();
-  }
-
-  static Status GetNativeSocket(shared_fd_t fd, NativeSocket &socket) {
-#ifdef _WIN32
-    socket = Socket::kInvalidSocketValue;
-    // Read WSAPROTOCOL_INFO from the parent process and create NativeSocket.
-    WSAPROTOCOL_INFO protocol_info;
-    {
-      Pipe socket_pipe(fd, LLDB_INVALID_PIPE);
-      size_t num_bytes;
-      Status error =
-          socket_pipe.ReadWithTimeout(&protocol_info, sizeof(protocol_info),
-                                      std::chrono::seconds(10), num_bytes);
-      if (error.Fail())
-        return error;
-      if (num_bytes != sizeof(protocol_info)) {
-        return Status(
-            "socket_pipe.ReadWithTimeout(WSAPROTOCOL_INFO) failed: % d bytes",
-            num_bytes);
-      }
-    }
-    socket = ::WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO,
-                         FROM_PROTOCOL_INFO, &protocol_info, 0, 0);
-    if (socket == INVALID_SOCKET) {
-      return Status("WSASocket(FROM_PROTOCOL_INFO) failed: error %d",
-                    ::WSAGetLastError());
-    }
-    return Status();
-#else
-    socket = fd;
-    return Status();
-#endif
-  }
-
-private:
-#ifdef _WIN32
-  Pipe m_socket_pipe;
-  NativeSocket m_socket;
-#endif
-  shared_fd_t m_fd;
-};
-
 static int g_debug = 0;
 static int g_verbose = 0;
 static int g_server = 0;
@@ -259,13 +157,13 @@ static void spawn_process_reaped(lldb::pid_t pid, int signal, int status) {
   gdbserver_portmap.FreePortForProcess(pid);
 }
 
-static Status spawn_process(const char *progname, Connection *conn,
+static Status spawn_process(const char *progname, const Socket *conn_socket,
                             uint16_t gdb_port, uint16_t port_offset,
                             const lldb_private::Args &args,
                             const std::string &log_file,
                             const StringRef log_channels) {
   Status error;
-  SharedSocket shared_socket(conn, error);
+  SharedSocket shared_socket(conn_socket, error);
   if (error.Fail())
     return error;
 
@@ -363,7 +261,7 @@ int main_platform(int argc, char *argv[]) {
   StringRef
       log_channels; // e.g. "lldb process threads:gdb-remote default:linux all"
 
-  shared_fd_t fd = kInvalidSharedFD;
+  shared_fd_t fd = SharedSocket::kInvalidFD;
 
   int min_gdbserver_port = 0;
   int max_gdbserver_port = 0;
@@ -480,7 +378,7 @@ int main_platform(int argc, char *argv[]) {
   }
 
   // Print usage and exit if no listening port is specified.
-  if (listen_host_port.empty() && fd == kInvalidSharedFD)
+  if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD)
     show_usage = true;
 
   if (show_usage || option_error) {
@@ -494,7 +392,7 @@ int main_platform(int argc, char *argv[]) {
   lldb_private::Args inferior_arguments;
   inferior_arguments.SetArguments(argc, const_cast<const char **>(argv));
 
-  if (fd != kInvalidSharedFD) {
+  if (fd != SharedSocket::kInvalidFD) {
     // Child process will handle the connection and exit.
     Log *log = GetLog(LLDBLog::Platform);
     if (!listen_host_port.empty()) {
@@ -510,13 +408,14 @@ int main_platform(int argc, char *argv[]) {
       return socket_error;
     }
 
-    Connection *conn =
-        new ConnectionFileDescriptor(new TCPSocket(socket, true, false));
     GDBRemoteCommunicationServerPlatform platform(Socket::ProtocolTcp, "tcp");
     if (port_offset > 0)
       platform.SetPortOffset(port_offset);
     platform.SetPortMap(std::move(gdbserver_portmap));
-    platform.SetConnection(std::unique_ptr<Connection>(conn));
+    platform.SetConnection(
+        std::unique_ptr<Connection>(new ConnectionFileDescriptor(
+            new TCPSocket(socket, /*should_close=*/true,
+                          /*child_processes_inherit=*/false))));
     client_handle(platform, inferior_arguments);
     return 0;
   }
@@ -578,8 +477,11 @@ int main_platform(int argc, char *argv[]) {
         fprintf(stderr,
                 "no available gdbserver port for connection - dropping...\n");
       } else {
-        error = spawn_process(progname, conn, *available_port, port_offset,
-                              inferior_arguments, log_file, log_channels);
+        const Socket *conn_socket =
+            static_cast<const Socket *>(conn->GetReadObject().get());
+        error =
+            spawn_process(progname, conn_socket, *available_port, port_offset,
+                          inferior_arguments, log_file, log_channels);
         if (error.Fail()) {
           {
 

From 41f2f1f028f2f4aeb6d13950b23d15290bf5dddf Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 26 Aug 2024 16:34:41 +0200
Subject: [PATCH 24/65] [SCCP] Add tests for non-null pointers (NFC)

---
 llvm/test/Transforms/SCCP/pointer-nonnull.ll | 144 +++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 llvm/test/Transforms/SCCP/pointer-nonnull.ll

diff --git a/llvm/test/Transforms/SCCP/pointer-nonnull.ll b/llvm/test/Transforms/SCCP/pointer-nonnull.ll
new file mode 100644
index 00000000000000..85367d8a56765e
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/pointer-nonnull.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=sccp < %s | FileCheck %s
+
+define i1 @test_no_attr(ptr %p) {
+; CHECK-LABEL: define i1 @test_no_attr(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[P]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp ne ptr %p, null
+  ret i1 %cmp
+}
+
+define i1 @test_nonnull(ptr nonnull %p) {
+; CHECK-LABEL: define i1 @test_nonnull(
+; CHECK-SAME: ptr nonnull [[P:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[P]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp ne ptr %p, null
+  ret i1 %cmp
+}
+
+define i1 @test_nonnull_eq(ptr nonnull %p) {
+; CHECK-LABEL: define i1 @test_nonnull_eq(
+; CHECK-SAME: ptr nonnull [[P:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp eq ptr %p, null
+  ret i1 %cmp
+}
+
+define i1 @test_dereferenceable(ptr dereferenceable(4) %p) {
+; CHECK-LABEL: define i1 @test_dereferenceable(
+; CHECK-SAME: ptr dereferenceable(4) [[P:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[P]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp ne ptr %p, null
+  ret i1 %cmp
+}
+
+define i1 @test_gep_no_flags(ptr nonnull %p, i64 %x) {
+; CHECK-LABEL: define i1 @test_gep_no_flags(
+; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[GEP]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr i8, ptr %p, i64 %x
+  %cmp = icmp ne ptr %gep, null
+  ret i1 %cmp
+}
+
+define i1 @test_gep_nuw(ptr nonnull %p, i64 %x) {
+; CHECK-LABEL: define i1 @test_gep_nuw(
+; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[GEP]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr nuw i8, ptr %p, i64 %x
+  %cmp = icmp ne ptr %gep, null
+  ret i1 %cmp
+}
+
+define i1 @test_gep_inbounds(ptr nonnull %p, i64 %x) {
+; CHECK-LABEL: define i1 @test_gep_inbounds(
+; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[GEP]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %x
+  %cmp = icmp ne ptr %gep, null
+  ret i1 %cmp
+}
+
+define i1 @test_gep_inbounds_null_pointer_valid(ptr nonnull %p, i64 %x) null_pointer_is_valid {
+; CHECK-LABEL: define i1 @test_gep_inbounds_null_pointer_valid(
+; CHECK-SAME: ptr nonnull [[P:%.*]], i64 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[GEP]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr inbounds i8, ptr %p, i64 %x
+  %cmp = icmp ne ptr %gep, null
+  ret i1 %cmp
+}
+
+define i1 @test_select(i1 %c, ptr nonnull %p, i64 %x) {
+; CHECK-LABEL: define i1 @test_select(
+; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C]], ptr [[P]], ptr [[GEP]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[SEL]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr nuw i8, ptr %p, i64 %x
+  %sel = select i1 %c, ptr %p, ptr %gep
+  %cmp = icmp ne ptr %sel, null
+  ret i1 %cmp
+}
+
+define i1 @test_select_not_nuw(i1 %c, ptr nonnull %p, i64 %x) {
+; CHECK-LABEL: define i1 @test_select_not_nuw(
+; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[X]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C]], ptr [[P]], ptr [[GEP]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[SEL]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr i8, ptr %p, i64 %x
+  %sel = select i1 %c, ptr %p, ptr %gep
+  %cmp = icmp ne ptr %sel, null
+  ret i1 %cmp
+}
+
+define i1 @test_phi(i1 %c, ptr nonnull %p, i64 %x) {
+; CHECK-LABEL: define i1 @test_phi(
+; CHECK-SAME: i1 [[C:%.*]], ptr nonnull [[P:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[IF:.*]], label %[[JOIN:.*]]
+; CHECK:       [[IF]]:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[X]]
+; CHECK-NEXT:    br label %[[JOIN]]
+; CHECK:       [[JOIN]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[P]], %[[ENTRY]] ], [ [[GEP]], %[[IF]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[PHI]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  br i1 %c, label %if, label %join
+
+if:
+  %gep = getelementptr nuw i8, ptr %p, i64 %x
+  br label %join
+
+join:
+  %phi = phi ptr [ %p, %entry ], [ %gep, %if ]
+  %cmp = icmp ne ptr %phi, null
+  ret i1 %cmp
+}

From 46a4132e167aa44d8ec7776262ce2a0e6d47de59 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@scylladb.com>
Date: Mon, 26 Aug 2024 17:56:45 +0300
Subject: [PATCH 25/65] [Instrumentation] Fix EdgeCounts vector size in
 SetBranchWeights (#99064)

---
 .../Instrumentation/PGOInstrumentation.cpp    | 14 +++++--
 .../Coroutines/coro-pgo-setbranchweights.ll   | 42 +++++++++++++++++++
 2 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll

diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index aacfe39f16fbc4..8dd0cfdb2ae0ab 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1658,11 +1658,17 @@ void PGOUseFunc::setBranchWeights() {
       continue;
 
     // We have a non-zero Branch BB.
-    unsigned Size = BBCountInfo.OutEdges.size();
-    SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
+
+    // SuccessorCount can be greater than OutEdgesCount, because
+    // removed edges don't appear in OutEdges.
+    unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
+    unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
+    assert(OutEdgesCount <= SuccessorCount);
+
+    SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
     uint64_t MaxCount = 0;
-    for (unsigned s = 0; s < Size; s++) {
-      const PGOUseEdge *E = BBCountInfo.OutEdges[s];
+    for (unsigned It = 0; It < OutEdgesCount; It++) {
+      const PGOUseEdge *E = BBCountInfo.OutEdges[It];
       const BasicBlock *SrcBB = E->SrcBB;
       const BasicBlock *DestBB = E->DestBB;
       if (DestBB == nullptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll
new file mode 100644
index 00000000000000..4f5f936606ca3f
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll
@@ -0,0 +1,42 @@
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+; RUN: opt < %t/a.ll --passes=pgo-instr-use -pgo-test-profile-file=%t/a.profdata
+
+;--- a.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-redhat-linux-gnu"
+
+define void @_bar() presplitcoroutine personality ptr null {
+  %1 = call token @llvm.coro.save(ptr null)
+  %2 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %2, label %5 [
+    i8 0, label %3
+    i8 1, label %4
+  ]
+
+3:                                                ; preds = %0
+  ret void
+
+4:                                                ; preds = %0
+  ret void
+
+5:                                                ; preds = %0
+  ret void
+}
+
+declare token @llvm.coro.save(ptr)
+
+declare i8 @llvm.coro.suspend(token, i1)
+
+;--- a.proftext
+# IR level Instrumentation Flag
+:ir
+
+_bar
+# Func Hash:
+1063705160175073211
+# Num Counters:
+2
+1
+0

From a195e2d461dde7c73c6dd24da097affb1a7b6f78 Mon Sep 17 00:00:00 2001
From: Kareem Ergawy <kareem.ergawy@amd.com>
Date: Mon, 26 Aug 2024 17:08:24 +0200
Subject: [PATCH 26/65] [MLIR][OpenMP] Handle privatization for global values
 in MLIR->LLVM translation (#104407)

Potential fix for https://github.com/llvm/llvm-project/issues/102939 and
https://github.com/llvm/llvm-project/issues/102949.

The issues occurs because the CodeExtractor component only collect
inputs (to the parallel regions) that are defined in the same function
in which the parallel regions is present. Howerver, this is problematic
because if we are privatizing a global value (e.g. a `target` variable
which is emitted as a global), then we miss finding that input and we do
not privatize the variable.

This commit attempts to fix the issue by adding a flag to the
CodeExtractor so that we can collect global inputs.
---
 ...privatization-allocatable-firstprivate.f90 |  5 +-
 .../llvm/Transforms/Utils/CodeExtractor.h     |  3 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 11 ++++-
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   |  7 ++-
 .../Target/LLVMIR/openmp-firstprivate.mlir    | 46 +++++++++++++++++++
 5 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
index 833976ff284a86..5f09371bbaba2e 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -57,6 +57,5 @@ end program compilation_to_obj
 ! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global
 
 ! LLVM: define internal void @_QQmain..omp_par
-! LLVM: %[[LOCAL_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-! LLVM-NEXT: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
-! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %[[LOCAL_VAR]], align 8
+! LLVM:      %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
+! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 68eb00a50fe030..826347e79f7195 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -187,7 +187,8 @@ class CodeExtractorAnalysisCache {
     /// sets, before extraction occurs. These modifications won't have any
     /// significant impact on the cost however.
     void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
-                           const ValueSet &Allocas) const;
+                           const ValueSet &Allocas,
+                           bool CollectGlobalInputs = false) const;
 
     /// Check if life time marker nodes can be hoisted/sunk into the outline
     /// region.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 70a6e74b94d55c..532313a31fc132 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1548,7 +1548,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   BasicBlock *CommonExit = nullptr;
   SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
   Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
-  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
+
+  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
+                              /*CollectGlobalInputs=*/true);
+
+  Inputs.remove_if([&](Value *I) {
+    if (auto *GV = dyn_cast_if_present<GlobalVariable>(I))
+      return GV->getValueType() == OpenMPIRBuilder::Ident;
+
+    return false;
+  });
 
   LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
 
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 81d3243c887fce..d378c6c3a4b01c 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -632,14 +632,17 @@ bool CodeExtractor::isEligible() const {
 }
 
 void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
-                                      const ValueSet &SinkCands) const {
+                                      const ValueSet &SinkCands,
+                                      bool CollectGlobalInputs) const {
   for (BasicBlock *BB : Blocks) {
     // If a used value is defined outside the region, it's an input.  If an
     // instruction is used outside the region, it's an output.
     for (Instruction &II : *BB) {
       for (auto &OI : II.operands()) {
         Value *V = OI;
-        if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+        if (!SinkCands.count(V) &&
+            (definedInCaller(Blocks, V) ||
+             (CollectGlobalInputs && llvm::isa<llvm::GlobalVariable>(V))))
           Inputs.insert(V);
       }
 
diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
index b06ad96f4592c5..02ce6b5b19ceaf 100644
--- a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
@@ -156,3 +156,49 @@ llvm.func @foo()
 // CHECK:         %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
 // CHECK:         %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
 // CHECK:         call void @foo()
+
+// -----
+
+// Verifies fix for https://github.com/llvm/llvm-project/issues/102939.
+//
+// The issues occurs because the CodeExtractor component only collect inputs
+// (to the parallel regions) that are defined in the same function in which the
+// parallel regions is present. Howerver, this is problematic because if we are
+// privatizing a global value (e.g. a `target` variable which is emitted as a
+// global), then we miss finding that input and we do not privatize the
+// variable.
+
+omp.private {type = firstprivate} @global_privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x f32 {bindc_name = "global", pinned} : (i64) -> !llvm.ptr
+  omp.yield(%1 : !llvm.ptr)
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> f32
+  llvm.store %0, %arg1 : f32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+llvm.func @global_accessor() {
+  %global_addr = llvm.mlir.addressof @global : !llvm.ptr
+  omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) {
+    %1 = llvm.mlir.constant(3.140000e+00 : f32) : f32
+    llvm.store %1, %arg0 : f32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 {
+  %0 = llvm.mlir.zero : f32
+  llvm.return %0 : f32
+}
+
+// CHECK-LABEL: @global_accessor..omp_par({{.*}})
+// CHECK-NEXT:  omp.par.entry:
+// Verify that we found the privatizer by checking that we properly inlined the
+// bodies of the alloc and copy regions.
+// CHECK:         %[[PRIV_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK:         %[[GLOB_VAL:.*]] = load float, ptr @global, align 4
+// CHECK:         store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4

From 0e24c32a6d6659fb4aa61ad52f068dbf6cb685c7 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 26 Aug 2024 17:13:09 +0200
Subject: [PATCH 27/65] [SCCP] Avoid some uses of SCCPSolver::isOverdefined
 (NFCI)

This is a confusingly named helper than means "is not unknown,
undef or constant". Prefer the more obvious ValueLattice API
instead. Most of these checks are for values which are forced to
overdefined by undef resolution, in which case only actual
overdefined values are relevant.
---
 llvm/lib/Transforms/Utils/SCCPSolver.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 670d88ac7cf8fa..c6f355a07d9c7f 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -1364,7 +1364,7 @@ void SCCPInstVisitor::visitInsertValueInst(InsertValueInst &IVI) {
 
   // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
   // discover a concrete value later.
-  if (SCCPSolver::isOverdefined(ValueState[&IVI]))
+  if (ValueState[&IVI].isOverdefined())
     return (void)markOverdefined(&IVI);
 
   // If this has more than one index, we can't handle it, drive all results to
@@ -1436,7 +1436,7 @@ void SCCPInstVisitor::visitUnaryOperator(Instruction &I) {
   ValueLatticeElement &IV = ValueState[&I];
   // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
   // discover a concrete value later.
-  if (SCCPSolver::isOverdefined(IV))
+  if (IV.isOverdefined())
     return (void)markOverdefined(&I);
 
   // If something is unknown/undef, wait for it to resolve.
@@ -1461,7 +1461,7 @@ void SCCPInstVisitor::visitFreezeInst(FreezeInst &I) {
   ValueLatticeElement &IV = ValueState[&I];
   // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
   // discover a concrete value later.
-  if (SCCPSolver::isOverdefined(IV))
+  if (IV.isOverdefined())
     return (void)markOverdefined(&I);
 
   // If something is unknown/undef, wait for it to resolve.
@@ -1541,7 +1541,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
 void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
   // Do not cache this lookup, getValueState calls later in the function might
   // invalidate the reference.
-  if (SCCPSolver::isOverdefined(ValueState[&I]))
+  if (ValueState[&I].isOverdefined())
     return (void)markOverdefined(&I);
 
   Value *Op1 = I.getOperand(0);
@@ -1571,7 +1571,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
 // Handle getelementptr instructions.  If all operands are constants then we
 // can turn this into a getelementptr ConstantExpr.
 void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
-  if (SCCPSolver::isOverdefined(ValueState[&I]))
+  if (ValueState[&I].isOverdefined())
     return (void)markOverdefined(&I);
 
   SmallVector<Constant *, 8> Operands;
@@ -1582,9 +1582,6 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
     if (State.isUnknownOrUndef())
       return; // Operands are not resolved yet.
 
-    if (SCCPSolver::isOverdefined(State))
-      return (void)markOverdefined(&I);
-
     if (Constant *C = getConstant(State, I.getOperand(i)->getType())) {
       Operands.push_back(C);
       continue;

From ea625f48ab50b45da39e2a52b4287c908a1c3efc Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov@espressif.com>
Date: Mon, 26 Aug 2024 18:21:34 +0300
Subject: [PATCH 28/65] [Xtensa] Implement sextload i8 (#106053)

---
 llvm/lib/Target/Xtensa/XtensaISelLowering.cpp |  3 ++-
 llvm/test/CodeGen/Xtensa/load.ll              | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/Xtensa/load.ll

diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index c7675c2f501761..0d2ce26a942e03 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -70,11 +70,12 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
 
-  // No sign extend instructions for i1
+  // No sign extend instructions for i1 and sign extend load i8
   for (MVT VT : MVT::integer_valuetypes()) {
     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
   }
 
   setOperationAction(ISD::ConstantPool, PtrVT, Custom);
diff --git a/llvm/test/CodeGen/Xtensa/load.ll b/llvm/test/CodeGen/Xtensa/load.ll
new file mode 100644
index 00000000000000..2f730f56eb1f51
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/load.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=xtensa < %s | FileCheck %s
+
+define signext i8 @test_load_i8(ptr %p){
+; CHECK-LABEL: test_load_i8:
+; CHECK:         l8ui a8, a2, 0
+; CHECK-NEXT:    slli a8, a8, 24
+; CHECK-NEXT:    srai a2, a8, 24
+; CHECK-NEXT:    ret
+  %1 = load i8, ptr %p, align 1
+  ret i8 %1
+}

From 762cb44581cf1397e76a4901e7a142ca6b0a51bb Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Mon, 26 Aug 2024 08:47:49 -0700
Subject: [PATCH 29/65] [Mips] Use a range-based for loop (NFC) (#106004)

---
 .../Target/Mips/MipsConstantIslandPass.cpp    | 26 +++++++++----------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index b2ba0f8fe74dc9..311b73710fb7a1 100644
--- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -1631,28 +1631,26 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
 
 void MipsConstantIslands::prescanForConstants() {
   for (MachineBasicBlock &B : *MF) {
-    for (MachineBasicBlock::instr_iterator I = B.instr_begin(),
-                                           EB = B.instr_end();
-         I != EB; ++I) {
-      switch(I->getDesc().getOpcode()) {
+    for (MachineInstr &MI : B) {
+      switch (MI.getDesc().getOpcode()) {
       case Mips::LwConstant32: {
         PrescannedForConstants = true;
-        LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n");
-        LLVM_DEBUG(dbgs() << "num operands " << I->getNumOperands() << "\n");
-        MachineOperand &Literal = I->getOperand(1);
+        LLVM_DEBUG(dbgs() << "constant island constant " << MI << "\n");
+        LLVM_DEBUG(dbgs() << "num operands " << MI.getNumOperands() << "\n");
+        MachineOperand &Literal = MI.getOperand(1);
         if (Literal.isImm()) {
           int64_t V = Literal.getImm();
           LLVM_DEBUG(dbgs() << "literal " << V << "\n");
           Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
           const Constant *C = ConstantInt::get(Int32Ty, V);
           unsigned index = MCP->getConstantPoolIndex(C, Align(4));
-          I->getOperand(2).ChangeToImmediate(index);
-          LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n");
-          I->setDesc(TII->get(Mips::LwRxPcTcp16));
-          I->removeOperand(1);
-          I->removeOperand(1);
-          I->addOperand(MachineOperand::CreateCPI(index, 0));
-          I->addOperand(MachineOperand::CreateImm(4));
+          MI.getOperand(2).ChangeToImmediate(index);
+          LLVM_DEBUG(dbgs() << "constant island constant " << MI << "\n");
+          MI.setDesc(TII->get(Mips::LwRxPcTcp16));
+          MI.removeOperand(1);
+          MI.removeOperand(1);
+          MI.addOperand(MachineOperand::CreateCPI(index, 0));
+          MI.addOperand(MachineOperand::CreateImm(4));
         }
         break;
       }

From bc695f522743e5408dbdfa77209106c235218654 Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@nuwen.net>
Date: Mon, 26 Aug 2024 08:49:47 -0700
Subject: [PATCH 30/65] [libc++][test] Add missing <concepts> in
 is_always_lock_free test (#105966)

That test was using std::same_as without including <concepts>.
---
 .../{is_always_lock_free.cpp => is_always_lock_free.pass.cpp}    | 1 +
 1 file changed, 1 insertion(+)
 rename libcxx/test/std/atomics/atomics.lockfree/{is_always_lock_free.cpp => is_always_lock_free.pass.cpp} (99%)

diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
similarity index 99%
rename from libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
rename to libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
index 2dc7f5c7654193..db17221e515d3a 100644
--- a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
+++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <atomic>
 #include <cassert>
+#include <concepts>
 #include <cstddef>
 
 #include "test_macros.h"

From 5a288b9183ca3b7d2bad2b39670803e0ca195f09 Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Mon, 26 Aug 2024 17:50:32 +0200
Subject: [PATCH 31/65] [Clang] Evaluate dependent indexes of pack indexing in
 a constant context (#106054)

Fixes #105900
---
 clang/docs/ReleaseNotes.rst                |  2 ++
 clang/lib/Sema/TreeTransform.h             | 23 +++++++++++++-----
 clang/test/SemaCXX/cxx2c-pack-indexing.cpp | 28 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2c6c7e083b9c91..7a9e7f0b4eba69 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -309,6 +309,8 @@ Bug Fixes to C++ Support
   template depth than the friend function template. (#GH98258)
 - Clang now rebuilds the template parameters of out-of-line declarations and specializations in the context
   of the current instantiation in all cases.
+- Fix evaluation of the index of dependent pack indexing expressions/types specifiers (#GH105900)
+
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 62287c2d26375c..b3854cd8f82220 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -6669,9 +6669,15 @@ QualType
 TreeTransform<Derived>::TransformPackIndexingType(TypeLocBuilder &TLB,
                                                   PackIndexingTypeLoc TL) {
   // Transform the index
-  ExprResult IndexExpr = getDerived().TransformExpr(TL.getIndexExpr());
-  if (IndexExpr.isInvalid())
-    return QualType();
+  ExprResult IndexExpr;
+  {
+    EnterExpressionEvaluationContext ConstantContext(
+        SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+
+    IndexExpr = getDerived().TransformExpr(TL.getIndexExpr());
+    if (IndexExpr.isInvalid())
+      return QualType();
+  }
   QualType Pattern = TL.getPattern();
 
   const PackIndexingType *PIT = TL.getTypePtr();
@@ -15299,9 +15305,14 @@ TreeTransform<Derived>::TransformPackIndexingExpr(PackIndexingExpr *E) {
     return E;
 
   // Transform the index
-  ExprResult IndexExpr = getDerived().TransformExpr(E->getIndexExpr());
-  if (IndexExpr.isInvalid())
-    return ExprError();
+  ExprResult IndexExpr;
+  {
+    EnterExpressionEvaluationContext ConstantContext(
+        SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+    IndexExpr = getDerived().TransformExpr(E->getIndexExpr());
+    if (IndexExpr.isInvalid())
+      return ExprError();
+  }
 
   SmallVector<Expr *, 5> ExpandedExprs;
   if (!E->expandsToEmptyPack() && E->getExpressions().empty()) {
diff --git a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp
index 9ea90a4c3e30fd..7d7e808746217f 100644
--- a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp
+++ b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp
@@ -231,3 +231,31 @@ struct type_info {
 namespace GH93650 {
 auto func(auto... inputArgs) { return typeid(inputArgs...[0]); }
 } // namespace GH93650
+
+
+namespace GH105900 {
+
+template <typename... opts>
+struct types  {
+    template <unsigned idx>
+    static constexpr __SIZE_TYPE__ get_index() { return idx; }
+
+    template <unsigned s>
+    static auto x() -> opts...[get_index<s>()] {}
+};
+
+template <auto... opts>
+struct vars  {
+    template <unsigned idx>
+    static constexpr __SIZE_TYPE__ get_index() { return idx; }
+
+    template <unsigned s>
+    static auto x() -> decltype(opts...[get_index<s>()]) {return 0;}
+};
+
+void f() {
+    types<void>::x<0>();
+    vars<0>::x<0>();
+}
+
+}

From 0f58ab851c4e9e5cb962e34144c4169dd675389b Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Mon, 26 Aug 2024 11:51:28 -0400
Subject: [PATCH 32/65] [libc++] Undo unintended renaming in bc695f522743

Renaming the test will require fixing additional issues, which
I will tackle in a separate patch.
---
 .../{is_always_lock_free.pass.cpp => is_always_lock_free.cpp}     | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename libcxx/test/std/atomics/atomics.lockfree/{is_always_lock_free.pass.cpp => is_always_lock_free.cpp} (100%)

diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
similarity index 100%
rename from libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
rename to libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp

From ee737c30da0418917935b23848d4760c8324de93 Mon Sep 17 00:00:00 2001
From: yifeizh2 <yifei.zhang@intel.com>
Date: Mon, 26 Aug 2024 23:52:42 +0800
Subject: [PATCH 33/65] [mlir][linalg] Exclude non-convolutional ops from
 isaConvolutionOpInterface (#102087)

Enhance `isaConvolutionOpInterface` logic.

Currently, `isaConvolutionOpInterface` returns false positive for linalg
binary elementwise ops, because the function's underlying logic does not
require the input linalg op to have convolved dims. We avoid such false
positive by further checking the non-emptyness of convolved dims.
---
 .../mlir/Dialect/Linalg/IR/LinalgInterfaces.h | 11 ++++++++--
 .../Dialect/Linalg/IR/LinalgInterfaces.cpp    | 20 ++++++++++++-------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h
index 08afdf373f014a..0fcaa96ade4031 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h
@@ -110,8 +110,12 @@ struct ConvolutionDimensions {
 FailureOr<ConvolutionDimensions> inferConvolutionDims(LinalgOp linalgOp);
 
 /// Checks whether `linalgOp` conforms to ConvolutionOpInterface.
+/// By default, we require the `linalgOp` to have non-empty convolved dims
+/// (implicitly non-empty `output_image` and `filter_loop`).
+/// Users can loosen the constraint by setting `allowEmptyConvolvedDims` to true
 // TODO: embed within `isa<ConvolutionOpInterface>` if possible / natural.
-bool isaConvolutionOpInterface(LinalgOp linalgOp);
+bool isaConvolutionOpInterface(LinalgOp linalgOp,
+                               bool allowEmptyConvolvedDims = false);
 
 /// Checks whether `linalgOp` is semantically equivalent to a `linalg.copyOp`.
 bool isaCopyOpInterface(LinalgOp linalgOp);
@@ -175,9 +179,12 @@ enum class MatchConvolutionResult;
 /// Checks whether `op` conforms to ConvolutionOpInterface and populates
 /// `dimensions` with indexes of the different kinds of dimensions when
 /// present.
+/// If `allowEmptyConvolvedDims` is not set, we further checks whether the `op`
+/// contains convolved dims.
 MatchConvolutionResult
 isConvolutionInterfaceImpl(Operation *op,
-                           ConvolutionDimensions *dimensions = nullptr);
+                           ConvolutionDimensions *dimensions = nullptr,
+                           bool allowEmptyConvolvedDims = false);
 
 /// Returns the error message corresponding to the convolution checking return
 /// code.
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index 6ee1810c2ff2b9..a38b20eed3a00c 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -762,13 +762,15 @@ enum class MatchConvolutionResult {
   NotProjectedPermutations,
   NonConvolutionLoop,
   OutputDimsNotParallel,
-  NonOutputDimNotReduction
+  NonOutputDimNotReduction,
+  EmptyConvolvedDims
 };
 } // namespace mlir::linalg::detail
 
 mlir::linalg::detail::MatchConvolutionResult
 mlir::linalg::detail::isConvolutionInterfaceImpl(
-    Operation *op, ConvolutionDimensions *dimensions) {
+    Operation *op, ConvolutionDimensions *dimensions,
+    bool allowEmptyConvolvedDims) {
   auto linalgOp = dyn_cast<linalg::LinalgOp>(op);
   if (!linalgOp)
     return MatchConvolutionResult::NotLinalgOp;
@@ -886,10 +888,12 @@ mlir::linalg::detail::isConvolutionInterfaceImpl(
   if (allLoopDims.size() != linalgOp.getNumLoops())
     return MatchConvolutionResult::NonConvolutionLoop;
 
+  if (!allowEmptyConvolvedDims && inputExprWalker.convolvedDims.empty())
+    return MatchConvolutionResult::EmptyConvolvedDims;
+
   if (dimensions) {
-    FailureOr<ConvolutionDimensions> res =
-        inferConvolutionDimsImpl(linalgOp, inputExprWalker,
-                                 /*allowEmptyConvolvedDims=*/true);
+    FailureOr<ConvolutionDimensions> res = inferConvolutionDimsImpl(
+        linalgOp, inputExprWalker, allowEmptyConvolvedDims);
     assert(succeeded(res) && "unexpected failure to infer convolution dims");
     *dimensions = *res;
   }
@@ -920,8 +924,10 @@ mlir::linalg::detail::getMatchConvolutionMessage(MatchConvolutionResult res) {
   llvm_unreachable("unhandled MatchConvolutionResult case");
 }
 
-bool mlir::linalg::isaConvolutionOpInterface(LinalgOp linalgOp) {
-  return linalg::detail::isConvolutionInterfaceImpl(linalgOp.getOperation()) ==
+bool mlir::linalg::isaConvolutionOpInterface(LinalgOp linalgOp,
+                                             bool allowEmptyConvolvedDims) {
+  return linalg::detail::isConvolutionInterfaceImpl(
+             linalgOp.getOperation(), nullptr, allowEmptyConvolvedDims) ==
          linalg::detail::MatchConvolutionResult::Success;
 }
 

From e04d124a96311d3714522125bf703950863e0540 Mon Sep 17 00:00:00 2001
From: Christopher Di Bella <cjdb@google.com>
Date: Mon, 26 Aug 2024 08:58:35 -0700
Subject: [PATCH 34/65] [libc++] Call basic_string_view's assume-valid
 constructor from basic_string operations (#105863)

`basic_string` frequently calls `basic_string_view(data(), size())`,
which accounts for ~15% of the observed overhead when hardening is
enabled. This commit removes unnecessary checks when `basic_string` is
known to already have valid data, by bypassing the public constructor,
so that we eliminate that overhead.
---
 libcxx/include/string      | 12 ++++++------
 libcxx/include/string_view |  4 ++++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/string b/libcxx/include/string
index 6e93a6230cc2c0..cdc1afedbdf52f 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1213,7 +1213,7 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator __self_view() const _NOEXCEPT {
-    return __self_view(data(), size());
+    return __self_view(typename __self_view::__assume_valid(), data(), size());
   }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS basic_string&
@@ -1822,7 +1822,7 @@ public:
 
 #if _LIBCPP_STD_VER >= 20
   constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(__self_view __sv) const noexcept {
-    return __self_view(data(), size()).starts_with(__sv);
+    return __self_view(typename __self_view::__assume_valid(), data(), size()).starts_with(__sv);
   }
 
   constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept {
@@ -1834,7 +1834,7 @@ public:
   }
 
   constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(__self_view __sv) const noexcept {
-    return __self_view(data(), size()).ends_with(__sv);
+    return __self_view(typename __self_view::__assume_valid(), data(), size()).ends_with(__sv);
   }
 
   constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept {
@@ -1848,15 +1848,15 @@ public:
 
 #if _LIBCPP_STD_VER >= 23
   constexpr _LIBCPP_HIDE_FROM_ABI bool contains(__self_view __sv) const noexcept {
-    return __self_view(data(), size()).contains(__sv);
+    return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__sv);
   }
 
   constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept {
-    return __self_view(data(), size()).contains(__c);
+    return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__c);
   }
 
   constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* __s) const {
-    return __self_view(data(), size()).contains(__s);
+    return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__s);
   }
 #endif
 
diff --git a/libcxx/include/string_view b/libcxx/include/string_view
index 2a03ee99e9ab52..cf97e3a9be314d 100644
--- a/libcxx/include/string_view
+++ b/libcxx/include/string_view
@@ -211,6 +211,7 @@ namespace std {
 #include <__functional/hash.h>
 #include <__functional/unary_function.h>
 #include <__fwd/ostream.h>
+#include <__fwd/string.h>
 #include <__fwd/string_view.h>
 #include <__iterator/bounded_iter.h>
 #include <__iterator/concepts.h>
@@ -689,6 +690,9 @@ private:
 
   const value_type* __data_;
   size_type __size_;
+
+  template <class, class, class>
+  friend class basic_string;
 };
 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(basic_string_view);
 

From 121ed07975c087291ac7faf681042d51211b9f97 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Mon, 26 Aug 2024 09:05:34 -0700
Subject: [PATCH 35/65] [MC][NFC] Count pseudo probes and function records
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-parse pseudo probes section counting the number of probes and
function records. These numbers are used in follow-up diff to
pre-allocate vectors for decoded probes and inline tree nodes.

Additional benefit is avoiding error handling during parsing.

This pre-parsing is fast: for a 404MiB .pseudo_probe section with
43373881 probes and 25228770 function records, it only takes 0.68±0.01s.
The total time of buildAddress2ProbeMap is 21s.

Reviewers: dcci, maksfb, rafaelauler, wlei-llvm, ayermolo

Reviewed By: wlei-llvm

Pull Request: https://github.com/llvm/llvm-project/pull/102774
---
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp |   1 -
 llvm/include/llvm/MC/MCPseudoProbe.h     |   6 +
 llvm/lib/MC/MCPseudoProbe.cpp            | 143 +++++++++++++++++------
 3 files changed, 113 insertions(+), 37 deletions(-)

diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 886bbdbf9d686e..37a5b937ebcaa3 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -143,7 +143,6 @@ void PseudoProbeRewriter::parsePseudoProbe() {
   if (!ProbeDecoder.buildAddress2ProbeMap(
           reinterpret_cast<const uint8_t *>(Contents.data()), Contents.size(),
           GuidFilter, FuncStartAddrs)) {
-    ProbeDecoder.getAddress2ProbesMap().clear();
     errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n";
     return;
   }
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 13ad1c38f3b3b0..3dd10c0717679b 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -370,6 +370,12 @@ class MCPseudoProbeDecoder {
   // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
   bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
 
+  // Decode pseudo_probe section to count the number of probes and inlined
+  // function records for each function record.
+  template <bool IsTopLevelFunc>
+  bool countRecords(bool &Discard, uint32_t &ProbeCount, uint32_t &InlinedCount,
+                    const Uint64Set &GuidFilter);
+
   // Decode pseudo_probe section to build address to probes map for specifed
   // functions only.
   bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 3f6f605149b479..77ac1fee4120f5 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
@@ -429,17 +430,11 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     Index = Cur->getChildren().size();
   } else {
     // Read inline site for inlinees
-    auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
-    if (!ErrorOrIndex)
-      return false;
-    Index = std::move(*ErrorOrIndex);
+    Index = cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
   }
 
   // Read guid
-  auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
-  if (!ErrorOrCurGuid)
-    return false;
-  uint64_t Guid = std::move(*ErrorOrCurGuid);
+  uint64_t Guid = cantFail(errorOrToExpected(readUnencodedNumber<uint64_t>()));
 
   // Decide if top-level node should be disgarded.
   if (IsTopLevelFunc && !GuidFilter.empty() && !GuidFilter.count(Guid))
@@ -457,41 +452,27 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   }
 
   // Read number of probes in the current node.
-  auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
-  if (!ErrorOrNodeCount)
-    return false;
-  uint32_t NodeCount = std::move(*ErrorOrNodeCount);
+  uint32_t NodeCount =
+      cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
   // Read number of direct inlinees
-  auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
-  if (!ErrorOrCurChildrenToProcess)
-    return false;
+  uint32_t ChildrenToProcess =
+      cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
   // Read all probes in this node
   for (std::size_t I = 0; I < NodeCount; I++) {
     // Read index
-    auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
-    if (!ErrorOrIndex)
-      return false;
-    uint32_t Index = std::move(*ErrorOrIndex);
+    uint32_t Index =
+        cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
     // Read type | flag.
-    auto ErrorOrValue = readUnencodedNumber<uint8_t>();
-    if (!ErrorOrValue)
-      return false;
-    uint8_t Value = std::move(*ErrorOrValue);
+    uint8_t Value = cantFail(errorOrToExpected(readUnencodedNumber<uint8_t>()));
     uint8_t Kind = Value & 0xf;
     uint8_t Attr = (Value & 0x70) >> 4;
     // Read address
     uint64_t Addr = 0;
     if (Value & 0x80) {
-      auto ErrorOrOffset = readSignedNumber<int64_t>();
-      if (!ErrorOrOffset)
-        return false;
-      int64_t Offset = std::move(*ErrorOrOffset);
+      int64_t Offset = cantFail(errorOrToExpected(readSignedNumber<int64_t>()));
       Addr = LastAddr + Offset;
     } else {
-      auto ErrorOrAddr = readUnencodedNumber<int64_t>();
-      if (!ErrorOrAddr)
-        return false;
-      Addr = std::move(*ErrorOrAddr);
+      Addr = cantFail(errorOrToExpected(readUnencodedNumber<int64_t>()));
       if (isSentinelProbe(Attr)) {
         // For sentinel probe, the addr field actually stores the GUID of the
         // split function. Convert it to the real address.
@@ -508,10 +489,8 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
 
     uint32_t Discriminator = 0;
     if (hasDiscriminator(Attr)) {
-      auto ErrorOrDiscriminator = readUnsignedNumber<uint32_t>();
-      if (!ErrorOrDiscriminator)
-        return false;
-      Discriminator = std::move(*ErrorOrDiscriminator);
+      Discriminator =
+          cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
     }
 
     if (Cur && !isSentinelProbe(Attr)) {
@@ -524,17 +503,109 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     LastAddr = Addr;
   }
 
-  uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
   for (uint32_t I = 0; I < ChildrenToProcess; I++) {
     buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs);
   }
+  return true;
+}
+
+template <bool IsTopLevelFunc>
+bool MCPseudoProbeDecoder::countRecords(bool &Discard, uint32_t &ProbeCount,
+                                        uint32_t &InlinedCount,
+                                        const Uint64Set &GuidFilter) {
+  if (!IsTopLevelFunc)
+    // Read inline site for inlinees
+    if (!readUnsignedNumber<uint32_t>())
+      return false;
+
+  // Read guid
+  auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
+  if (!ErrorOrCurGuid)
+    return false;
+  uint64_t Guid = std::move(*ErrorOrCurGuid);
+
+  // Decide if top-level node should be disgarded.
+  if (IsTopLevelFunc) {
+    Discard = !GuidFilter.empty() && !GuidFilter.count(Guid);
+    if (!Discard)
+      // Allocate an entry for top-level function record.
+      ++InlinedCount;
+  }
+
+  // Read number of probes in the current node.
+  auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
+  if (!ErrorOrNodeCount)
+    return false;
+  uint32_t NodeCount = std::move(*ErrorOrNodeCount);
+  uint32_t CurrentProbeCount = 0;
+
+  // Read number of direct inlinees
+  auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
+  if (!ErrorOrCurChildrenToProcess)
+    return false;
+  uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
+
+  // Read all probes in this node
+  for (std::size_t I = 0; I < NodeCount; I++) {
+    // Read index
+    if (!readUnsignedNumber<uint32_t>())
+      return false;
+
+    // Read type | flag.
+    auto ErrorOrValue = readUnencodedNumber<uint8_t>();
+    if (!ErrorOrValue)
+      return false;
+    uint8_t Value = std::move(*ErrorOrValue);
+
+    uint8_t Attr = (Value & 0x70) >> 4;
+    if (Value & 0x80) {
+      // Offset
+      if (!readSignedNumber<int64_t>())
+        return false;
+    } else {
+      // Addr
+      if (!readUnencodedNumber<int64_t>())
+        return false;
+    }
+
+    if (hasDiscriminator(Attr))
+      // Discriminator
+      if (!readUnsignedNumber<uint32_t>())
+        return false;
+
+    if (!Discard && !isSentinelProbe(Attr))
+      ++CurrentProbeCount;
+  }
 
+  if (!Discard) {
+    ProbeCount += CurrentProbeCount;
+    InlinedCount += ChildrenToProcess;
+  }
+
+  for (uint32_t I = 0; I < ChildrenToProcess; I++)
+    if (!countRecords<false>(Discard, ProbeCount, InlinedCount, GuidFilter))
+      return false;
   return true;
 }
 
 bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     const uint8_t *Start, std::size_t Size, const Uint64Set &GuidFilter,
     const Uint64Map &FuncStartAddrs) {
+  // For function records in the order of their appearance in the encoded data
+  // (DFS), count the number of contained probes and inlined function records.
+  uint32_t ProbeCount = 0;
+  uint32_t InlinedCount = 0;
+  uint32_t TopLevelFuncs = 0;
+  Data = Start;
+  End = Data + Size;
+  bool Discard = false;
+  while (Data < End) {
+    if (!countRecords<true>(Discard, ProbeCount, InlinedCount, GuidFilter))
+      return false;
+    TopLevelFuncs += !Discard;
+  }
+  assert(Data == End && "Have unprocessed data in pseudo_probe section");
+
   Data = Start;
   End = Data + Size;
   uint64_t LastAddr = 0;

From 04ebd1907c0561831e4fcf2658e1f3614f8cdd77 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Mon, 26 Aug 2024 09:09:13 -0700
Subject: [PATCH 36/65] [MC][NFC] Statically allocate storage for decoded
 pseudo probes and function records

Use #102774 to allocate storage for decoded probes (`PseudoProbeVec`)
and function records (`InlineTreeVec`).

Leverage that to also shrink sizes of `MCDecodedPseudoProbe`:
- Drop Guid since it's accessible via `InlineTree`.

`MCDecodedPseudoProbeInlineTree`:
- Keep track of probes and inlinees using `ArrayRef`s now that probes
  and function records belonging to the same function are allocated
  contiguously.

This reduces peak RSS from 13.7 GiB to 9.7 GiB and pseudo probe parsing
time (as part of perf2bolt) from 15.3s to 9.6s for a large binary with
400MiB .pseudo_probe section containing 43M probes and 25M function
records.

Depends on:
#102774
#102787
#102788

Reviewers: maksfb, rafaelauler, dcci, ayermolo, wlei-llvm

Reviewed By: wlei-llvm

Pull Request: https://github.com/llvm/llvm-project/pull/102789
---
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp     |  30 ++--
 llvm/include/llvm/MC/MCPseudoProbe.h         | 136 +++++++++++++------
 llvm/lib/MC/MCPseudoProbe.cpp                |  58 +++++---
 llvm/tools/llvm-profgen/ProfileGenerator.cpp |   6 +-
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   |  10 +-
 5 files changed, 164 insertions(+), 76 deletions(-)

diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 37a5b937ebcaa3..9677530919b90d 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -200,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() {
     }
 
     unsigned ProbeTrack = AP.second.size();
-    std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
+    auto Probe = llvm::map_iterator(
+        AP.second.begin(),
+        [](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
     while (ProbeTrack != 0) {
       if (Probe->isBlock()) {
         Probe->setAddress(BlkOutputAddress);
@@ -218,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
         }
 
         while (CallOutputAddress != CallOutputAddresses.second) {
-          AP.second.push_back(*Probe);
-          AP.second.back().setAddress(CallOutputAddress->second);
-          Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
+          ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
           CallOutputAddress = std::next(CallOutputAddress);
         }
       }
@@ -332,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
       ProbeDecoder.getDummyInlineRoot();
   for (auto Child = Root.getChildren().begin();
        Child != Root.getChildren().end(); ++Child)
-    Inlinees[Child->first] = Child->second.get();
+    Inlinees[Child->getInlineSite()] = &*Child;
 
   for (auto Inlinee : Inlinees)
     // INT64_MAX is "placeholder" of unused callsite index field in the pair
@@ -358,25 +358,37 @@ void PseudoProbeRewriter::encodePseudoProbes() {
     EmitInt(Cur->Guid, 8);
     // Emit number of probes in this node
     uint64_t Deleted = 0;
-    for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
+    for (MCDecodedPseudoProbe *&Probe :
+         llvm::make_pointer_range(Cur->getProbes()))
       if (Probe->getAddress() == INT64_MAX)
         Deleted++;
     LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
-    uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
+    size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
+    uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes;
     EmitULEB128IntValue(ProbesSize);
     // Emit number of direct inlinees
     EmitULEB128IntValue(Cur->getChildren().size());
     // Emit probes in this group
-    for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
+    for (MCDecodedPseudoProbe *&Probe :
+         llvm::make_pointer_range(Cur->getProbes())) {
       if (Probe->getAddress() == INT64_MAX)
         continue;
       EmitDecodedPseudoProbe(Probe);
       LastProbe = Probe;
     }
+    if (InjectedProbes) {
+      for (MCDecodedPseudoProbe *&Probe :
+           llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) {
+        if (Probe->getAddress() == INT64_MAX)
+          continue;
+        EmitDecodedPseudoProbe(Probe);
+        LastProbe = Probe;
+      }
+    }
 
     for (auto Child = Cur->getChildren().begin();
          Child != Cur->getChildren().end(); ++Child)
-      Inlinees[Child->first] = Child->second.get();
+      Inlinees[Child->getInlineSite()] = &*Child;
     for (const auto &Inlinee : Inlinees) {
       assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
       NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 3dd10c0717679b..66ad9db4860d8a 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -54,20 +54,21 @@
 #ifndef LLVM_MC_MCPSEUDOPROBE_H
 #define LLVM_MC_MCPSEUDOPROBE_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/IR/PseudoProbe.h"
 #include "llvm/Support/ErrorOr.h"
-#include <list>
+#include <functional>
 #include <map>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <type_traits>
 #include <unordered_map>
-#include <unordered_set>
 #include <vector>
 
 namespace llvm {
@@ -103,14 +104,15 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
 using GUIDProbeFunctionMap =
     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
 // Address to pseudo probes map.
-using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>;
+using AddressProbesMap =
+    std::map<uint64_t,
+             std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;
 
 class MCDecodedPseudoProbeInlineTree;
 
 class MCPseudoProbeBase {
 protected:
-  uint64_t Guid;
-  uint64_t Index;
+  uint32_t Index;
   uint32_t Discriminator;
   uint8_t Attributes;
   uint8_t Type;
@@ -120,14 +122,12 @@ class MCPseudoProbeBase {
   const static uint32_t PseudoProbeFirstId = 1;
 
 public:
-  MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
-      : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
+  MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D)
+      : Index(I), Discriminator(D), Attributes(At), Type(T) {}
 
   bool isEntry() const { return Index == PseudoProbeFirstId; }
 
-  uint64_t getGuid() const { return Guid; }
-
-  uint64_t getIndex() const { return Index; }
+  uint32_t getIndex() const { return Index; }
 
   uint32_t getDiscriminator() const { return Discriminator; }
 
@@ -157,18 +157,20 @@ class MCPseudoProbeBase {
 /// uses an address from a temporary label created at the current address in the
 /// current section.
 class MCPseudoProbe : public MCPseudoProbeBase {
+  uint64_t Guid;
   MCSymbol *Label;
 
 public:
   MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
                 uint64_t Attributes, uint32_t Discriminator)
-      : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
+      : MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid),
         Label(Label) {
     assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
     assert(Attributes <= 0xFF &&
            "Probe attributes too big to encode, exceeding 2^16");
   }
 
+  uint64_t getGuid() const { return Guid; };
   MCSymbol *getLabel() const { return Label; }
   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
 };
@@ -181,11 +183,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
   MCDecodedPseudoProbeInlineTree *InlineTree;
 
 public:
-  MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
-                       uint8_t At, uint32_t D,
-                       MCDecodedPseudoProbeInlineTree *Tree)
-      : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
+  MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At,
+                       uint32_t D, MCDecodedPseudoProbeInlineTree *Tree)
+      : MCPseudoProbeBase(I, At, static_cast<uint8_t>(K), D), Address(Ad),
         InlineTree(Tree){};
+  uint64_t getGuid() const;
 
   uint64_t getAddress() const { return Address; }
 
@@ -211,21 +213,14 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
              bool ShowName) const;
 };
 
-template <typename ProbeType, typename DerivedProbeInlineTreeType>
+template <typename ProbesType, typename DerivedProbeInlineTreeType,
+          typename InlinedProbeTreeMap>
 class MCPseudoProbeInlineTreeBase {
-  struct InlineSiteHash {
-    uint64_t operator()(const InlineSite &Site) const {
-      return std::get<0>(Site) ^ std::get<1>(Site);
-    }
-  };
-
 protected:
   // Track children (e.g. inlinees) of current context
-  using InlinedProbeTreeMap = std::unordered_map<
-      InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
   InlinedProbeTreeMap Children;
   // Set of probes that come with the function.
-  std::vector<ProbeType> Probes;
+  ProbesType Probes;
   MCPseudoProbeInlineTreeBase() {
     static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
                                   DerivedProbeInlineTreeType>::value,
@@ -240,12 +235,10 @@ class MCPseudoProbeInlineTreeBase {
   bool isRoot() const { return Guid == 0; }
   InlinedProbeTreeMap &getChildren() { return Children; }
   const InlinedProbeTreeMap &getChildren() const { return Children; }
-  std::vector<ProbeType> &getProbes() { return Probes; }
-  const std::vector<ProbeType> &getProbes() const { return Probes; }
-  void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
+  const ProbesType &getProbes() const { return Probes; }
   // Caller node of the inline site
-  MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent =
-      nullptr;
+  MCPseudoProbeInlineTreeBase<ProbesType, DerivedProbeInlineTreeType,
+                              InlinedProbeTreeMap> *Parent = nullptr;
   DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
     auto Ret = Children.emplace(
         Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
@@ -259,9 +252,17 @@ class MCPseudoProbeInlineTreeBase {
 // instance is created as the root of a tree.
 // A real instance of this class is created for each function, either a
 // not inlined function that has code in .text section or an inlined function.
+struct InlineSiteHash {
+  uint64_t operator()(const InlineSite &Site) const {
+    return std::get<0>(Site) ^ std::get<1>(Site);
+  }
+};
 class MCPseudoProbeInlineTree
-    : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
-                                         MCPseudoProbeInlineTree> {
+    : public MCPseudoProbeInlineTreeBase<
+          std::vector<MCPseudoProbe>, MCPseudoProbeInlineTree,
+          std::unordered_map<InlineSite,
+                             std::unique_ptr<MCPseudoProbeInlineTree>,
+                             InlineSiteHash>> {
 public:
   MCPseudoProbeInlineTree() = default;
   MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
@@ -277,16 +278,31 @@ class MCPseudoProbeInlineTree
 
 // inline tree node for the decoded pseudo probe
 class MCDecodedPseudoProbeInlineTree
-    : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
-                                         MCDecodedPseudoProbeInlineTree> {
-public:
-  InlineSite ISite;
+    : public MCPseudoProbeInlineTreeBase<
+          MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree,
+          MutableArrayRef<MCDecodedPseudoProbeInlineTree>> {
+  uint32_t NumProbes = 0;
+  uint32_t ProbeId = 0;
 
+public:
   MCDecodedPseudoProbeInlineTree() = default;
-  MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
+  MCDecodedPseudoProbeInlineTree(const InlineSite &Site,
+                                 MCDecodedPseudoProbeInlineTree *Parent)
+      : ProbeId(std::get<1>(Site)) {
+    this->Guid = std::get<0>(Site);
+    this->Parent = Parent;
+  }
 
   // Return false if it's a dummy inline site
   bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
+  InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
+  void setProbes(MutableArrayRef<MCDecodedPseudoProbe> ProbesRef) {
+    Probes = ProbesRef.data();
+    NumProbes = ProbesRef.size();
+  }
+  auto getProbes() const {
+    return MutableArrayRef<MCDecodedPseudoProbe>(Probes, NumProbes);
+  }
 };
 
 /// Instances of this class represent the pseudo probes inserted into a compile
@@ -336,6 +352,20 @@ class MCPseudoProbeTable {
 };
 
 class MCPseudoProbeDecoder {
+  // Decoded pseudo probes vector.
+  std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
+  // Injected pseudo probes, identified by the containing inline tree node.
+  // Need to keep injected probes separately for two reasons:
+  // 1) Probes cannot be added to the PseudoProbeVec: appending may cause
+  //    reallocation so that pointers to its elements will become invalid.
+  // 2) Probes belonging to function record must be contiguous in PseudoProbeVec
+  //    as owning InlineTree references them with an ArrayRef to save space.
+  std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
+                     std::vector<MCDecodedPseudoProbe>>
+      InjectedProbeMap;
+  // Decoded inline records vector.
+  std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;
+
   // GUID to PseudoProbeFuncDesc map.
   GUIDProbeFunctionMap GUID2FuncDescMap;
 
@@ -382,10 +412,6 @@ class MCPseudoProbeDecoder {
                              const Uint64Set &GuildFilter,
                              const Uint64Map &FuncStartAddrs);
 
-  bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
-                             uint64_t &LastAddr, const Uint64Set &GuildFilter,
-                             const Uint64Map &FuncStartAddrs);
-
   // Print pseudo_probe_desc section info
   void printGUID2FuncDescMap(raw_ostream &OS);
 
@@ -428,6 +454,34 @@ class MCPseudoProbeDecoder {
   const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
     return DummyInlineRoot;
   }
+
+  void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
+    const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
+    InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
+  }
+
+  size_t
+  getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
+    auto It = InjectedProbeMap.find(Parent);
+    if (It == InjectedProbeMap.end())
+      return 0;
+    return It->second.size();
+  }
+
+  auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
+    auto It = InjectedProbeMap.find(Parent);
+    assert(It != InjectedProbeMap.end());
+    return iterator_range(It->second);
+  }
+
+private:
+  // Recursively parse an inlining tree encoded in pseudo_probe section. Returns
+  // whether the the top-level node should be skipped.
+  template <bool IsTopLevelFunc>
+  bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
+                             uint64_t &LastAddr, const Uint64Set &GuildFilter,
+                             const Uint64Map &FuncStartAddrs,
+                             const uint32_t CurChildIndex);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 77ac1fee4120f5..1031dac331bb1c 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -49,6 +49,8 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A,
   return AddrDelta;
 }
 
+uint64_t MCDecodedPseudoProbe::getGuid() const { return InlineTree->Guid; }
+
 void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
                          const MCPseudoProbe *LastProbe) const {
   bool IsSentinel = isSentinelProbe(getAttributes());
@@ -289,8 +291,8 @@ void MCDecodedPseudoProbe::getInlineContext(
   // Note that it won't include the probe's belonging function(leaf location)
   while (Cur->hasInlineSite()) {
     StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid);
-    ContextStack.emplace_back(
-        MCPseudoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite)));
+    ContextStack.emplace_back(MCPseudoProbeFrameLocation(
+        FuncName, std::get<1>(Cur->getInlineSite())));
     Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
   }
   // Make the ContextStack in caller-callee order
@@ -318,10 +320,10 @@ void MCDecodedPseudoProbe::print(raw_ostream &OS,
                                  bool ShowName) const {
   OS << "FUNC: ";
   if (ShowName) {
-    StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Guid);
+    StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, getGuid());
     OS << FuncName.str() << " ";
   } else {
-    OS << Guid << " ";
+    OS << getGuid() << " ";
   }
   OS << "Index: " << Index << "  ";
   if (Discriminator)
@@ -417,17 +419,18 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
   return true;
 }
 
+template <bool IsTopLevelFunc>
 bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr,
-    const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) {
+    const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs,
+    const uint32_t CurChildIndex) {
   // The pseudo_probe section encodes an inline forest and each tree has a
   // format defined in MCPseudoProbe.h
 
   uint32_t Index = 0;
-  bool IsTopLevelFunc = Cur == &DummyInlineRoot;
   if (IsTopLevelFunc) {
     // Use a sequential id for top level inliner.
-    Index = Cur->getChildren().size();
+    Index = CurChildIndex;
   } else {
     // Read inline site for inlinees
     Index = cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
@@ -443,8 +446,9 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   // If the incoming node is null, all its children nodes should be disgarded.
   if (Cur) {
     // Switch/add to a new tree node(inlinee)
-    Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index));
-    Cur->Guid = Guid;
+    Cur->getChildren()[CurChildIndex] =
+        MCDecodedPseudoProbeInlineTree(InlineSite(Guid, Index), Cur);
+    Cur = &Cur->getChildren()[CurChildIndex];
     if (IsTopLevelFunc && !EncodingIsAddrBased) {
       if (auto V = FuncStartAddrs.lookup(Guid))
         LastAddr = V;
@@ -454,6 +458,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   // Read number of probes in the current node.
   uint32_t NodeCount =
       cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
+  uint32_t CurrentProbeCount = 0;
   // Read number of direct inlinees
   uint32_t ChildrenToProcess =
       cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
@@ -494,19 +499,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     }
 
     if (Cur && !isSentinelProbe(Attr)) {
-      // Populate Address2ProbesMap
-      auto &Probes = Address2ProbesMap[Addr];
-      Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
-                          Discriminator, Cur);
-      Cur->addProbes(&Probes.back());
+      PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr,
+                                  Discriminator, Cur);
+      Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
+      ++CurrentProbeCount;
     }
     LastAddr = Addr;
   }
 
+  if (Cur) {
+    Cur->setProbes(
+        MutableArrayRef(PseudoProbeVec).take_back(CurrentProbeCount));
+    InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess);
+    Cur->getChildren() =
+        MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess);
+  }
   for (uint32_t I = 0; I < ChildrenToProcess; I++) {
-    buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs);
+    buildAddress2ProbeMap<false>(Cur, LastAddr, GuidFilter, FuncStartAddrs, I);
   }
-  return true;
+  return Cur;
 }
 
 template <bool IsTopLevelFunc>
@@ -605,14 +616,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     TopLevelFuncs += !Discard;
   }
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
+  PseudoProbeVec.reserve(ProbeCount);
+  InlineTreeVec.reserve(InlinedCount);
+
+  // Allocate top-level function records as children of DummyInlineRoot.
+  InlineTreeVec.resize(TopLevelFuncs);
+  DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec);
 
   Data = Start;
   End = Data + Size;
   uint64_t LastAddr = 0;
+  uint32_t CurChildIndex = 0;
   while (Data < End)
-    buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter,
-                          FuncStartAddrs);
+    CurChildIndex += buildAddress2ProbeMap<true>(
+        &DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex);
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
+  assert(PseudoProbeVec.size() == ProbeCount &&
+         "Mismatching probe count pre- and post-parsing");
+  assert(InlineTreeVec.size() == InlinedCount &&
+         "Mismatching function records count pre- and post-parsing");
   return true;
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5094871a1d415d..ea7b9b9c7bd528 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -1293,9 +1293,9 @@ void CSProfileGenerator::populateBodySamplesWithProbes(
   // and will be inferred by the compiler.
   for (auto &I : FrameSamples) {
     for (auto *FunctionProfile : I.second) {
-      for (auto *Probe : I.first->getProbes()) {
-        FunctionProfile->addBodySamples(Probe->getIndex(),
-                                        Probe->getDiscriminator(), 0);
+      for (const MCDecodedPseudoProbe &Probe : I.first->getProbes()) {
+        FunctionProfile->addBodySamples(Probe.getIndex(),
+                                        Probe.getDiscriminator(), 0);
       }
     }
   }
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index a458ffcb96b41a..e4fc3816cd0c45 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
     MCPseudoProbeDecoder &ProbeDecoder) {
   ProbeFrameStack ProbeContext;
   for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
-    trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext);
+    trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext);
 }
 
 void BinarySizeContextTracker::trackInlineesOptimizedAway(
@@ -160,9 +160,9 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
 
   // DFS down the probe inline tree
   for (const auto &ChildNode : ProbeNode.getChildren()) {
-    InlineSite Location = ChildNode.first;
+    InlineSite Location = ChildNode.getInlineSite();
     ProbeContext.back().second = std::get<1>(Location);
-    trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext);
+    trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext);
   }
 
   ProbeContext.pop_back();
@@ -454,8 +454,8 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
   // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
   // is available
   if (TrackFuncContextSize) {
-    for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
-      auto *Frame = Child.second.get();
+    for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
+      auto *Frame = &Child;
       StringRef FuncName =
           ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
       TopLevelProbeFrameMap[FuncName] = Frame;

From ee09f7d1fc173f2b495838c925f2cf39a2b55369 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Mon, 26 Aug 2024 09:14:35 -0700
Subject: [PATCH 37/65] [MC][NFC] Reduce Address2ProbesMap size

Replace the map from addresses to list of probes with a flat vector
containing probe references sorted by their addresses.

Reduces pseudo probe parsing time from 9.56s to 8.59s and peak RSS from
9.66 GiB to 9.08 GiB as part of perf2bolt processing a large binary.

Test Plan:
```
bin/llvm-lit -sv test/tools/llvm-profgen
```

Reviewers: maksfb, rafaelauler, dcci, ayermolo, wlei-llvm

Reviewed By: wlei-llvm

Pull Request: https://github.com/llvm/llvm-project/pull/102904
---
 bolt/lib/Profile/DataAggregator.cpp          | 14 ++--
 bolt/lib/Profile/YAMLProfileWriter.cpp       | 11 +--
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp     | 83 ++++++++------------
 llvm/include/llvm/MC/MCPseudoProbe.h         | 30 +++++--
 llvm/lib/MC/MCPseudoProbe.cpp                | 43 +++++-----
 llvm/tools/llvm-profgen/ProfileGenerator.cpp |  8 +-
 6 files changed, 94 insertions(+), 95 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index a300e5b2b1dabd..813d825f8b570c 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
         Fragments.insert(BF);
         for (const BinaryFunction *F : Fragments) {
           const uint64_t FuncAddr = F->getAddress();
-          const auto &FragmentProbes =
-              llvm::make_range(ProbeMap.lower_bound(FuncAddr),
-                               ProbeMap.lower_bound(FuncAddr + F->getSize()));
-          for (const auto &[OutputAddress, Probes] : FragmentProbes) {
+          for (const MCDecodedPseudoProbe &Probe :
+               ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
+            const uint32_t OutputAddress = Probe.getAddress();
             const uint32_t InputOffset = BAT->translate(
                 FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
             const unsigned BlockIndex = getBlock(InputOffset).second;
-            for (const MCDecodedPseudoProbe &Probe : Probes)
-              YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
-                  yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
-                                              Probe.getType()});
+            YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
+                yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
+                                            Probe.getType()});
           }
         }
       }
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index 84777741d611a3..f74cf60e076d0a 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
       const uint64_t FuncAddr = BF.getAddress();
       const std::pair<uint64_t, uint64_t> &BlockRange =
           BB->getInputAddressRange();
-      const auto &BlockProbes =
-          llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
-                           ProbeMap.lower_bound(FuncAddr + BlockRange.second));
-      for (const auto &[_, Probes] : BlockProbes)
-        for (const MCDecodedPseudoProbe &Probe : Probes)
-          YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
-              Probe.getGuid(), Probe.getIndex(), Probe.getType()});
+      for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
+               FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
+        YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
+            Probe.getGuid(), Probe.getIndex(), Probe.getType()});
     }
 
     YamlBF.Blocks.emplace_back(YamlBB);
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 9677530919b90d..7516918b2389fc 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -173,13 +173,13 @@ void PseudoProbeRewriter::updatePseudoProbes() {
   AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap();
   const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();
 
-  for (auto &AP : Address2ProbesMap) {
-    BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first);
+  for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
+    uint64_t Address = Probe.getAddress();
+    BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address);
     // If F is removed, eliminate all probes inside it from inline tree
     // Setting probes' addresses as INT64_MAX means elimination
     if (!F) {
-      for (MCDecodedPseudoProbe &Probe : AP.second)
-        Probe.setAddress(INT64_MAX);
+      Probe.setAddress(INT64_MAX);
       continue;
     }
     // If F is not emitted, the function will remain in the same address as its
@@ -187,45 +187,36 @@ void PseudoProbeRewriter::updatePseudoProbes() {
     if (!F->isEmitted())
       continue;
 
-    uint64_t Offset = AP.first - F->getAddress();
+    uint64_t Offset = Address - F->getAddress();
     const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
     uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
     // Check if block output address is defined.
     // If not, such block is removed from binary. Then remove the probes from
     // inline tree
     if (BlkOutputAddress == 0) {
-      for (MCDecodedPseudoProbe &Probe : AP.second)
-        Probe.setAddress(INT64_MAX);
+      Probe.setAddress(INT64_MAX);
       continue;
     }
 
-    unsigned ProbeTrack = AP.second.size();
-    auto Probe = llvm::map_iterator(
-        AP.second.begin(),
-        [](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
-    while (ProbeTrack != 0) {
-      if (Probe->isBlock()) {
-        Probe->setAddress(BlkOutputAddress);
-      } else if (Probe->isCall()) {
-        // A call probe may be duplicated due to ICP
-        // Go through output of InputOffsetToAddressMap to collect all related
-        // probes
-        auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
-        auto CallOutputAddress = CallOutputAddresses.first;
-        if (CallOutputAddress == CallOutputAddresses.second) {
-          Probe->setAddress(INT64_MAX);
-        } else {
-          Probe->setAddress(CallOutputAddress->second);
-          CallOutputAddress = std::next(CallOutputAddress);
-        }
-
-        while (CallOutputAddress != CallOutputAddresses.second) {
-          ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
-          CallOutputAddress = std::next(CallOutputAddress);
-        }
+    if (Probe.isBlock()) {
+      Probe.setAddress(BlkOutputAddress);
+    } else if (Probe.isCall()) {
+      // A call probe may be duplicated due to ICP
+      // Go through output of InputOffsetToAddressMap to collect all related
+      // probes
+      auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address);
+      auto CallOutputAddress = CallOutputAddresses.first;
+      if (CallOutputAddress == CallOutputAddresses.second) {
+        Probe.setAddress(INT64_MAX);
+      } else {
+        Probe.setAddress(CallOutputAddress->second);
+        CallOutputAddress = std::next(CallOutputAddress);
+      }
+
+      while (CallOutputAddress != CallOutputAddresses.second) {
+        ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second);
+        CallOutputAddress = std::next(CallOutputAddress);
       }
-      Probe = std::next(Probe);
-      ProbeTrack--;
     }
   }
 
@@ -241,22 +232,16 @@ void PseudoProbeRewriter::updatePseudoProbes() {
             BinaryBlock.getName();
 
     // scan all addresses -> correlate probe to block when print out
-    std::vector<uint64_t> Addresses;
-    for (auto &Entry : Address2ProbesMap)
-      Addresses.push_back(Entry.first);
-    llvm::sort(Addresses);
-    for (uint64_t Key : Addresses) {
-      for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
-        if (Probe.getAddress() == INT64_MAX)
-          outs() << "Deleted Probe: ";
-        else
-          outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
-        Probe.print(outs(), GUID2Func, true);
-        // print block name only if the probe is block type and undeleted.
-        if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
-          outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
-                 << Addr2BlockNames[Probe.getAddress()] << "\n";
-      }
+    for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
+      if (Probe.getAddress() == INT64_MAX)
+        outs() << "Deleted Probe: ";
+      else
+        outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
+      Probe.print(outs(), GUID2Func, true);
+      // print block name only if the probe is block type and undeleted.
+      if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
+        outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
+               << Addr2BlockNames[Probe.getAddress()] << "\n";
     }
     outs() << "=======================================\n";
   }
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 66ad9db4860d8a..854f1209c39346 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -63,7 +63,6 @@
 #include "llvm/IR/PseudoProbe.h"
 #include "llvm/Support/ErrorOr.h"
 #include <functional>
-#include <map>
 #include <memory>
 #include <string>
 #include <tuple>
@@ -103,10 +102,6 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
 // GUID to PseudoProbeFuncDesc map
 using GUIDProbeFunctionMap =
     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
-// Address to pseudo probes map.
-using AddressProbesMap =
-    std::map<uint64_t,
-             std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;
 
 class MCDecodedPseudoProbeInlineTree;
 
@@ -213,6 +208,31 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
              bool ShowName) const;
 };
 
+// Address to pseudo probes map.
+class AddressProbesMap
+    : public std::vector<std::reference_wrapper<MCDecodedPseudoProbe>> {
+  auto getIt(uint64_t Addr) const {
+    auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) {
+      return Probe.getAddress() < Addr;
+    };
+    return llvm::lower_bound(*this, Addr, CompareProbe);
+  }
+
+public:
+  // Returns range of probes within [\p From, \p To) address range.
+  auto find(uint64_t From, uint64_t To) const {
+    return llvm::make_range(getIt(From), getIt(To));
+  }
+  // Returns range of probes with given \p Address.
+  auto find(uint64_t Address) const {
+    auto FromIt = getIt(Address);
+    if (FromIt == end() || FromIt->get().getAddress() != Address)
+      return llvm::make_range(end(), end());
+    auto ToIt = getIt(Address + 1);
+    return llvm::make_range(FromIt, ToIt);
+  }
+};
+
 template <typename ProbesType, typename DerivedProbeInlineTreeType,
           typename InlinedProbeTreeMap>
 class MCPseudoProbeInlineTreeBase {
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 1031dac331bb1c..5951499c0cb280 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -501,7 +501,6 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     if (Cur && !isSentinelProbe(Attr)) {
       PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr,
                                   Discriminator, Cur);
-      Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
       ++CurrentProbeCount;
     }
     LastAddr = Addr;
@@ -635,6 +634,15 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
          "Mismatching probe count pre- and post-parsing");
   assert(InlineTreeVec.size() == InlinedCount &&
          "Mismatching function records count pre- and post-parsing");
+
+  std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
+  for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
+    SortedA2P[I] = {Probe.getAddress(), I};
+  llvm::sort(SortedA2P);
+  Address2ProbesMap.reserve(ProbeCount);
+  for (const uint32_t I : llvm::make_second_range(SortedA2P))
+    Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
+  SortedA2P.clear();
   return true;
 }
 
@@ -650,36 +658,29 @@ void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
 
 void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
                                                 uint64_t Address) {
-  auto It = Address2ProbesMap.find(Address);
-  if (It != Address2ProbesMap.end()) {
-    for (const MCDecodedPseudoProbe &Probe : It->second) {
-      OS << " [Probe]:\t";
-      Probe.print(OS, GUID2FuncDescMap, true);
-    }
+  for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
+    OS << " [Probe]:\t";
+    Probe.print(OS, GUID2FuncDescMap, true);
   }
 }
 
 void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
-  auto Entries = make_first_range(Address2ProbesMap);
-  SmallVector<uint64_t, 0> Addresses(Entries.begin(), Entries.end());
-  llvm::sort(Addresses);
-  for (auto K : Addresses) {
-    OS << "Address:\t";
-    OS << K;
-    OS << "\n";
-    printProbeForAddress(OS, K);
+  uint64_t PrevAddress = INT64_MAX;
+  for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
+    uint64_t Address = Probe.getAddress();
+    if (Address != PrevAddress) {
+      PrevAddress = Address;
+      OS << "Address:\t" << Address << '\n';
+    }
+    OS << " [Probe]:\t";
+    Probe.print(OS, GUID2FuncDescMap, true);
   }
 }
 
 const MCDecodedPseudoProbe *
 MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
-  auto It = Address2ProbesMap.find(Address);
-  if (It == Address2ProbesMap.end())
-    return nullptr;
-  const auto &Probes = It->second;
-
   const MCDecodedPseudoProbe *CallProbe = nullptr;
-  for (const MCDecodedPseudoProbe &Probe : Probes) {
+  for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
     if (Probe.isCall()) {
       // Disabling the assert and returning first call probe seen so far.
       // Subsequent call probes, if any, are ignored. Due to the the way
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index ea7b9b9c7bd528..b47c77c5f2ff3f 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -1183,11 +1183,9 @@ void ProfileGeneratorBase::extractProbesFromRange(
     do {
       const AddressProbesMap &Address2ProbesMap =
           Binary->getAddress2ProbesMap();
-      auto It = Address2ProbesMap.find(IP.Address);
-      if (It != Address2ProbesMap.end()) {
-        for (const MCDecodedPseudoProbe &Probe : It->second) {
-          ProbeCounter[&Probe] += Count;
-        }
+      for (const MCDecodedPseudoProbe &Probe :
+           Address2ProbesMap.find(IP.Address)) {
+        ProbeCounter[&Probe] += Count;
       }
     } while (IP.advance() && IP.Address <= RangeEnd);
   }

From a79cf0228e4ade1f69e8196fdcf9c0184b4ba1b7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Mon, 26 Aug 2024 09:15:53 -0700
Subject: [PATCH 38/65] [MC][NFC] Use vector for GUIDProbeFunctionMap

Replace unordered_map with a vector. Pre-parse the section to statically
allocate storage. Use BumpPtrAllocator for FuncName strings, keep
StringRef in FuncDesc.

Reduces peak RSS of pseudo probe parsing from 9.08 GiB to 8.89 GiB as
part of perf2bolt with a large binary.

Test Plan:
```
bin/llvm-lit -sv test/tools/llvm-profgen
```

Reviewers: wlei-llvm, rafaelauler, dcci, maksfb, ayermolo

Reviewed By: wlei-llvm

Pull Request: https://github.com/llvm/llvm-project/pull/102905
---
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp |  3 +-
 llvm/include/llvm/MC/MCPseudoProbe.h     | 19 +++++++--
 llvm/lib/MC/MCPseudoProbe.cpp            | 52 ++++++++++++++----------
 3 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 7516918b2389fc..4925b4b385d9b1 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -155,7 +155,8 @@ void PseudoProbeRewriter::parsePseudoProbe() {
     ProbeDecoder.printProbesForAllAddresses(outs());
   }
 
-  for (const auto &[GUID, FuncDesc] : ProbeDecoder.getGUID2FuncDescMap()) {
+  for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) {
+    uint64_t GUID = FuncDesc.FuncGUID;
     if (!FuncStartAddrs.contains(GUID))
       continue;
     BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]);
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 854f1209c39346..32905c1e9a424a 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -61,6 +61,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/IR/PseudoProbe.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorOr.h"
 #include <functional>
 #include <memory>
@@ -86,7 +87,7 @@ enum class MCPseudoProbeFlag {
 struct MCPseudoProbeFuncDesc {
   uint64_t FuncGUID = 0;
   uint64_t FuncHash = 0;
-  std::string FuncName;
+  StringRef FuncName;
 
   MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
       : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
@@ -100,8 +101,18 @@ class MCDecodedPseudoProbe;
 using InlineSite = std::tuple<uint64_t, uint32_t>;
 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
 // GUID to PseudoProbeFuncDesc map
-using GUIDProbeFunctionMap =
-    std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
+class GUIDProbeFunctionMap : public std::vector<MCPseudoProbeFuncDesc> {
+public:
+  auto find(uint64_t GUID) const {
+    auto CompareDesc = [](const MCPseudoProbeFuncDesc &Desc, uint64_t GUID) {
+      return Desc.FuncGUID < GUID;
+    };
+    auto It = llvm::lower_bound(*this, GUID, CompareDesc);
+    if (It->FuncGUID != GUID)
+      return end();
+    return It;
+  }
+};
 
 class MCDecodedPseudoProbeInlineTree;
 
@@ -389,6 +400,8 @@ class MCPseudoProbeDecoder {
   // GUID to PseudoProbeFuncDesc map.
   GUIDProbeFunctionMap GUID2FuncDescMap;
 
+  BumpPtrAllocator FuncNameAllocator;
+
   // Address to probes map.
   AddressProbesMap Address2ProbesMap;
 
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 5951499c0cb280..90d7588407068a 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -274,7 +274,7 @@ static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP,
   auto It = GUID2FuncMAP.find(GUID);
   assert(It != GUID2FuncMAP.end() &&
          "Probe function must exist for a valid GUID");
-  return It->second.FuncName;
+  return It->FuncName;
 }
 
 void MCPseudoProbeFuncDesc::print(raw_ostream &OS) {
@@ -390,32 +390,46 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
   Data = Start;
   End = Data + Size;
 
+  uint32_t FuncDescCount = 0;
   while (Data < End) {
-    auto ErrorOrGUID = readUnencodedNumber<uint64_t>();
-    if (!ErrorOrGUID)
+    // GUID
+    if (!readUnencodedNumber<uint64_t>())
       return false;
-
-    auto ErrorOrHash = readUnencodedNumber<uint64_t>();
-    if (!ErrorOrHash)
+    // Hash
+    if (!readUnencodedNumber<uint64_t>())
       return false;
 
     auto ErrorOrNameSize = readUnsignedNumber<uint32_t>();
     if (!ErrorOrNameSize)
       return false;
-    uint32_t NameSize = std::move(*ErrorOrNameSize);
-
-    auto ErrorOrName = readString(NameSize);
-    if (!ErrorOrName)
+    // Function name
+    if (!readString(*ErrorOrNameSize))
       return false;
+    ++FuncDescCount;
+  }
+  assert(Data == End && "Have unprocessed data in pseudo_probe_desc section");
+  GUID2FuncDescMap.reserve(FuncDescCount);
 
-    uint64_t GUID = std::move(*ErrorOrGUID);
-    uint64_t Hash = std::move(*ErrorOrHash);
-    StringRef Name = std::move(*ErrorOrName);
+  Data = Start;
+  End = Data + Size;
+  while (Data < End) {
+    uint64_t GUID =
+        cantFail(errorOrToExpected(readUnencodedNumber<uint64_t>()));
+    uint64_t Hash =
+        cantFail(errorOrToExpected(readUnencodedNumber<uint64_t>()));
+    uint32_t NameSize =
+        cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
+    StringRef Name = cantFail(errorOrToExpected(readString(NameSize)));
 
     // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap
-    GUID2FuncDescMap.emplace(GUID, MCPseudoProbeFuncDesc(GUID, Hash, Name));
+    GUID2FuncDescMap.emplace_back(GUID, Hash, Name.copy(FuncNameAllocator));
   }
   assert(Data == End && "Have unprocessed data in pseudo_probe_desc section");
+  assert(GUID2FuncDescMap.size() == FuncDescCount &&
+         "Mismatching function description count pre- and post-parsing");
+  llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) {
+    return LHS.FuncGUID < RHS.FuncGUID;
+  });
   return true;
 }
 
@@ -648,12 +662,8 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
 
 void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
   OS << "Pseudo Probe Desc:\n";
-  // Make the output deterministic
-  std::map<uint64_t, MCPseudoProbeFuncDesc> OrderedMap(GUID2FuncDescMap.begin(),
-                                                       GUID2FuncDescMap.end());
-  for (auto &I : OrderedMap) {
-    I.second.print(OS);
-  }
+  for (auto &I : GUID2FuncDescMap)
+    I.print(OS);
 }
 
 void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
@@ -705,7 +715,7 @@ const MCPseudoProbeFuncDesc *
 MCPseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
   auto It = GUID2FuncDescMap.find(GUID);
   assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist");
-  return &It->second;
+  return &*It;
 }
 
 void MCPseudoProbeDecoder::getInlineContextForProbe(

From c1b3ebba7909e9e3e99a4ac45bef38d7f590cc3b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 26 Aug 2024 09:37:49 -0700
Subject: [PATCH 39/65] [MC] Update MCOperand::getReg/setReg/createReg and
 MCInstBuilder::addReg to use MCRegister. (#106015)

Replace unsigned with MCRegister.

Update some ternary operators that started giving errors.
---
 llvm/include/llvm/MC/MCInst.h                         | 11 ++++++-----
 llvm/include/llvm/MC/MCInstBuilder.h                  |  2 +-
 llvm/lib/MCA/InstrBuilder.cpp                         |  2 +-
 llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp  |  4 ++--
 .../Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp |  2 +-
 .../Target/LoongArch/AsmParser/LoongArchAsmParser.cpp |  6 +++---
 llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp      |  6 +++---
 7 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/MC/MCInst.h b/llvm/include/llvm/MC/MCInst.h
index 578b7328970b76..b3d615b4392f55 100644
--- a/llvm/include/llvm/MC/MCInst.h
+++ b/llvm/include/llvm/MC/MCInst.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/bit.h"
+#include "llvm/MC/MCRegister.h"
 #include "llvm/Support/SMLoc.h"
 #include <cassert>
 #include <cstddef>
@@ -66,15 +67,15 @@ class MCOperand {
   bool isInst() const { return Kind == kInst; }
 
   /// Returns the register number.
-  unsigned getReg() const {
+  MCRegister getReg() const {
     assert(isReg() && "This is not a register operand!");
     return RegVal;
   }
 
   /// Set the register number.
-  void setReg(unsigned Reg) {
+  void setReg(MCRegister Reg) {
     assert(isReg() && "This is not a register operand!");
-    RegVal = Reg;
+    RegVal = Reg.id();
   }
 
   int64_t getImm() const {
@@ -131,10 +132,10 @@ class MCOperand {
     InstVal = Val;
   }
 
-  static MCOperand createReg(unsigned Reg) {
+  static MCOperand createReg(MCRegister Reg) {
     MCOperand Op;
     Op.Kind = kRegister;
-    Op.RegVal = Reg;
+    Op.RegVal = Reg.id();
     return Op;
   }
 
diff --git a/llvm/include/llvm/MC/MCInstBuilder.h b/llvm/include/llvm/MC/MCInstBuilder.h
index d06ed4c6c840a9..de45ffb4b2dc7c 100644
--- a/llvm/include/llvm/MC/MCInstBuilder.h
+++ b/llvm/include/llvm/MC/MCInstBuilder.h
@@ -34,7 +34,7 @@ class MCInstBuilder {
   }
 
   /// Add a new register operand.
-  MCInstBuilder &addReg(unsigned Reg) {
+  MCInstBuilder &addReg(MCRegister Reg) {
     Inst.addOperand(MCOperand::createReg(Reg));
     return *this;
   }
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 32b20d758ee70b..c4d88856abdfb9 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -799,7 +799,7 @@ InstrBuilder::createInstruction(const MCInst &MCI,
   unsigned WriteIndex = 0;
   Idx = 0U;
   for (const WriteDescriptor &WD : D.Writes) {
-    RegID = WD.isImplicitWrite() ? WD.RegisterID
+    RegID = WD.isImplicitWrite() ? MCRegister(WD.RegisterID)
                                  : MCI.getOperand(WD.OpIndex).getReg();
     // Check if this is a optional definition that references NoReg or a write
     // to a constant register.
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1a10206eea2374..3914f36338fa50 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3815,7 +3815,7 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
     const MCOperand &Opr = Inst.getOperand(OperandIdx);
     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
                ? Opr.getReg()
-               : MCRegister::NoRegister;
+               : MCRegister();
   };
 
   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
@@ -4753,7 +4753,7 @@ static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
   if (!Op.isReg())
     return -1;
 
-  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
+  MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
   auto Reg = Sub ? Sub : Op.getReg();
   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
   return AGPR32.contains(Reg) ? 1 : 0;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1a0dc7098347ac..b1da9da19c69b1 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -382,7 +382,7 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
   if (!Op.isReg())
     return false;
 
-  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
+  MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
   auto Reg = Sub ? Sub : Op.getReg();
   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
 }
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index f52e188f877920..c2ae4a0734b6a7 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -1314,8 +1314,8 @@ void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc,
   // expands to:
   //   pcaddu18i $rj, %call36(sym)
   //   jirl      $r0, $rj, 0
-  unsigned ScratchReg =
-      IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1;
+  MCRegister ScratchReg =
+      IsTailCall ? Inst.getOperand(0).getReg() : MCRegister(LoongArch::R1);
   const MCExpr *Sym =
       IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr();
   const LoongArchMCExpr *LE = LoongArchMCExpr::create(
@@ -1326,7 +1326,7 @@ void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc,
       getSTI());
   Out.emitInstruction(
       MCInstBuilder(LoongArch::JIRL)
-          .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg)
+          .addReg(IsTailCall ? MCRegister(LoongArch::R0) : ScratchReg)
           .addReg(ScratchReg)
           .addImm(0),
       getSTI());
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 076e0a20cb97e9..c50c2063ee8edf 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -5782,9 +5782,9 @@ bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
       sel = 3;
       break;
   }
-  unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd;
-  unsigned Op1 =
-      IsMFTR ? rd
+  MCRegister Op0 = IsMFTR ? Inst.getOperand(0).getReg() : MCRegister(rd);
+  MCRegister Op1 =
+      IsMFTR ? MCRegister(rd)
              : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg()
                                                  : Inst.getOperand(0).getReg());
 

From 625e929d4305987a85c86ad4c67f1e15b36f89e0 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Mon, 26 Aug 2024 09:27:58 -0700
Subject: [PATCH 40/65] [SLP][NFC]Add a test with incorrect reduced gather node
 with extra use in cmp node, NFC.

---
 .../SLPVectorizer/X86/gather-with-cmp-user.ll | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll
new file mode 100644
index 00000000000000..10ca5a2700ebee
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i1 @test(i32 %g, i16 %d) {
+; CHECK-LABEL: define i1 @test(
+; CHECK-SAME: i32 [[G:%.*]], i16 [[D:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[D]], 1
+; CHECK-NEXT:    [[XOR_I_I:%.*]] = xor i32 [[G]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[G]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[XOR_I_I]] to i8
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i8> [[TMP2]], i8 [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i8> [[TMP5]], <i8 -9, i8 -9, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], <i8 -3, i8 -3, i8 -3, i8 -3>
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8>
+; CHECK-NEXT:    [[TMP9:%.*]] = sext <2 x i8> [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP12]])
+; CHECK-NEXT:    ret i1 [[TMP13]]
+;
+entry:
+  %0 = and i16 %d, 1
+  %xor.i.i = xor i32 %g, 1
+  %conv1.i.i = trunc i32 %xor.i.i to i8
+  %notsub.i = add i8 %conv1.i.i, -1
+  %cmp.i.i = icmp sgt i8 %notsub.i, -3
+  %conv3.i.i = zext i1 %cmp.i.i to i32
+  %cmp4.i.i = icmp sgt i32 %xor.i.i, %conv3.i.i
+  %conv1.1.i.i = trunc i32 %g to i8
+  %notsub25.i = add i8 %conv1.1.i.i, -1
+  %cmp.1.i.i = icmp sgt i8 %notsub25.i, -3
+  %conv3.1.i.i = zext i1 %cmp.1.i.i to i32
+  %cmp4.1.i.i = icmp sgt i32 %g, %conv3.1.i.i
+  %notsub26.i = add i8 %conv1.1.i.i, -9
+  %cmp.i17.i = icmp sgt i8 %notsub26.i, -3
+  %conv3.i18.i = zext i1 %cmp.i17.i to i32
+  %cmp4.i19.i = icmp sgt i32 %g, %conv3.i18.i
+  %notsub27.i = add i8 %conv1.i.i, -9
+  %cmp.1.i22.i = icmp sgt i8 %notsub27.i, -3
+  %conv3.1.i23.i = zext i1 %cmp.1.i22.i to i32
+  %cmp4.1.i24.i = icmp sgt i32 %xor.i.i, %conv3.1.i23.i
+  %1 = and i1 %cmp4.i19.i, %cmp4.1.i24.i
+  %2 = and i1 %cmp4.i.i, %1
+  %3 = and i1 %cmp4.1.i.i, %2
+  ret i1 %3
+}

From 2e426fe8ff314c2565073e73e27fdbdf36c140a3 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk@google.com>
Date: Mon, 26 Aug 2024 09:43:03 -0700
Subject: [PATCH 41/65] Add unit tests for size returning new funcs in the
 MemProf use pass. (#105473)

We use a unit test to verify correctness since:
a) we don't have a text format profile
b) size returning new isn't supported natively
c) a raw profile will need to be manipulated artificially

The changes this test covers were made in
https://github.com/llvm/llvm-project/pull/102258.
---
 .../llvm/ProfileData/InstrProfReader.h        |   8 +-
 .../Transforms/Instrumentation/MemProfiler.h  |  19 ++-
 .../Instrumentation/MemProfiler.cpp           |  42 +++--
 .../Transforms/Instrumentation/CMakeLists.txt |   1 +
 .../Instrumentation/MemProfilerTest.cpp       | 158 ++++++++++++++++++
 5 files changed, 199 insertions(+), 29 deletions(-)
 create mode 100644 llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp

diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 3b307d08359980..95c891442fd6e9 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -670,10 +670,11 @@ class IndexedMemProfReader {
 
 public:
   IndexedMemProfReader() = default;
+  virtual ~IndexedMemProfReader() = default;
 
   Error deserialize(const unsigned char *Start, uint64_t MemProfOffset);
 
-  Expected<memprof::MemProfRecord>
+  virtual Expected<memprof::MemProfRecord>
   getMemProfRecord(const uint64_t FuncNameHash) const;
 };
 
@@ -768,11 +769,14 @@ class IndexedInstrProfReader : public InstrProfReader {
                      uint64_t *MismatchedFuncSum = nullptr);
 
   /// Return the memprof record for the function identified by
-  /// llvm::md5(Name).
+  /// llvm::md5(Name). Marked virtual so that unit tests can mock this function.
   Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash) {
     return MemProfReader.getMemProfRecord(FuncNameHash);
   }
 
+  /// Return the underlying memprof reader.
+  IndexedMemProfReader &getIndexedMemProfReader() { return MemProfReader; }
+
   /// Fill Counts with the profile data for the given function name.
   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
                           std::vector<uint64_t> &Counts);
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index f92c6b4775a2a2..c5d03c98f41581 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -13,15 +13,15 @@
 #define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/VirtualFileSystem.h"
 
 namespace llvm {
 class Function;
 class Module;
-
-namespace vfs {
-class FileSystem;
-} // namespace vfs
+class TargetLibraryInfo;
 
 /// Public interface to the memory profiler pass for instrumenting code to
 /// profile memory accesses.
@@ -52,6 +52,17 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
                           IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
+  struct AllocMatchInfo {
+    uint64_t TotalSize = 0;
+    AllocationType AllocType = AllocationType::None;
+    bool Matched = false;
+  };
+
+  void
+  readMemprof(Function &F, const IndexedMemProfReader &MemProfReader,
+              const TargetLibraryInfo &TLI,
+              std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo);
+
 private:
   std::string MemoryProfileFileName;
   IntrusiveRefCntPtr<vfs::FileSystem> FS;
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 4a43120c9a9e7f..bd10c037ecf4ad 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -39,7 +39,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/HashBuilder.h"
-#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -55,6 +54,7 @@ namespace llvm {
 extern cl::opt<bool> PGOWarnMissing;
 extern cl::opt<bool> NoPGOWarnMismatch;
 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
+using AllocMatchInfo = ::llvm::MemProfUsePass::AllocMatchInfo;
 } // namespace llvm
 
 constexpr int LLVM_MEM_PROFILER_VERSION = 1;
@@ -148,10 +148,11 @@ static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
 
 // By default disable matching of allocation profiles onto operator new that
 // already explicitly pass a hot/cold hint, since we don't currently
-// override these hints anyway.
-static cl::opt<bool> ClMemProfMatchHotColdNew(
+// override these hints anyway. Not static so that it can be set in the unit
+// test too.
+cl::opt<bool> ClMemProfMatchHotColdNew(
     "memprof-match-hot-cold-new",
- cl::desc(
+    cl::desc(
         "Match allocation profiles onto existing hot/cold operator new calls"),
     cl::Hidden, cl::init(false));
 
@@ -789,17 +790,11 @@ static bool isAllocationWithHotColdVariant(Function *Callee,
   }
 }
 
-struct AllocMatchInfo {
-  uint64_t TotalSize = 0;
-  AllocationType AllocType = AllocationType::None;
-  bool Matched = false;
-};
-
-static void
-readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
-            const TargetLibraryInfo &TLI,
-            std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
-  auto &Ctx = M.getContext();
+void MemProfUsePass::readMemprof(
+    Function &F, const IndexedMemProfReader &MemProfReader,
+    const TargetLibraryInfo &TLI,
+    std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
+  auto &Ctx = F.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
   // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
@@ -810,7 +805,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
   auto FuncName = F.getName();
   auto FuncGUID = Function::getGUID(FuncName);
   std::optional<memprof::MemProfRecord> MemProfRec;
-  auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
+  auto Err = MemProfReader.getMemProfRecord(FuncGUID).moveInto(MemProfRec);
   if (Err) {
     handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
       auto Err = IPE.get();
@@ -838,8 +833,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
                          Twine(" Hash = ") + std::to_string(FuncGUID))
                             .str();
 
-      Ctx.diagnose(
-          DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
+      Ctx.diagnose(DiagnosticInfoPGOProfile(F.getParent()->getName().data(),
+                                            Msg, DS_Warning));
     });
     return;
   }
@@ -1036,15 +1031,15 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
     return PreservedAnalyses::all();
   }
 
-  std::unique_ptr<IndexedInstrProfReader> MemProfReader =
+  std::unique_ptr<IndexedInstrProfReader> IndexedReader =
       std::move(ReaderOrErr.get());
-  if (!MemProfReader) {
+  if (!IndexedReader) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(
-        MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
+        MemoryProfileFileName.data(), StringRef("Cannot get IndexedReader")));
     return PreservedAnalyses::all();
   }
 
-  if (!MemProfReader->hasMemoryProfile()) {
+  if (!IndexedReader->hasMemoryProfile()) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
                                           "Not a memory profile"));
     return PreservedAnalyses::all();
@@ -1057,12 +1052,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   // it to an allocation in the IR.
   std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
 
+  const auto &MemProfReader = IndexedReader->getIndexedMemProfReader();
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
 
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
-    readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
+    readMemprof(F, MemProfReader, TLI, FullStackIdToAllocMatchInfo);
   }
 
   if (ClPrintMemProfMatchInfo) {
diff --git a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt
index 1f249b0049d062..1afe1c339e4335 100644
--- a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(InstrumentationTests
   PGOInstrumentationTest.cpp
+  MemProfilerTest.cpp
   )
 
 target_link_libraries(InstrumentationTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp
new file mode 100644
index 00000000000000..844867d676e8dd
--- /dev/null
+++ b/llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp
@@ -0,0 +1,158 @@
+//===- MemProfilerTest.cpp - MemProfiler unit tests ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+extern llvm::cl::opt<bool> ClMemProfMatchHotColdNew;
+
+namespace llvm {
+namespace memprof {
+namespace {
+
+using ::testing::Return;
+using ::testing::SizeIs;
+
+struct MemProfilerTest : public ::testing::Test {
+  LLVMContext Context;
+  std::unique_ptr<Module> M;
+
+  MemProfilerTest() { ClMemProfMatchHotColdNew = true; }
+
+  void parseAssembly(const StringRef IR) {
+    SMDiagnostic Error;
+    M = parseAssemblyString(IR, Error, Context);
+    std::string ErrMsg;
+    raw_string_ostream OS(ErrMsg);
+    Error.print("", OS);
+
+    // A failure here means that the test itself is buggy.
+    if (!M)
+      report_fatal_error(OS.str().c_str());
+  }
+};
+
+// A mock memprof reader we can inject into the function we are testing.
+class MockMemProfReader : public IndexedMemProfReader {
+public:
+  MOCK_METHOD(Expected<MemProfRecord>, getMemProfRecord,
+              (const uint64_t FuncNameHash), (const, override));
+
+  // A helper function to create mock records from frames.
+  static MemProfRecord makeRecord(ArrayRef<ArrayRef<Frame>> AllocFrames) {
+    MemProfRecord Record;
+    MemInfoBlock Info;
+    // Mimic values which will be below the cold threshold.
+    Info.AllocCount = 1, Info.TotalSize = 550;
+    Info.TotalLifetime = 1000 * 1000, Info.TotalLifetimeAccessDensity = 1;
+    for (const auto &Callstack : AllocFrames) {
+      AllocationInfo AI;
+      AI.Info = PortableMemInfoBlock(Info, getHotColdSchema());
+      AI.CallStack = std::vector(Callstack.begin(), Callstack.end());
+      Record.AllocSites.push_back(AI);
+    }
+    return Record;
+  }
+};
+
+TEST_F(MemProfilerTest, AnnotatesCall) {
+  parseAssembly(R"IR(
+    target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+    target triple = "x86_64-unknown-linux-gnu"
+
+    define void @_Z3foov() !dbg !10 {
+    entry:
+      %c1 = call {ptr, i64} @__size_returning_new(i64 32), !dbg !13
+      %c2 = call {ptr, i64} @__size_returning_new_aligned(i64 32, i64 8), !dbg !14
+      %c3 = call {ptr, i64} @__size_returning_new_hot_cold(i64 32, i8 254), !dbg !15
+      %c4 = call {ptr, i64} @__size_returning_new_aligned_hot_cold(i64 32, i64 8, i8 254), !dbg !16
+      ret void
+    }
+
+    declare {ptr, i64} @__size_returning_new(i64)
+    declare {ptr, i64} @__size_returning_new_aligned(i64, i64)
+    declare {ptr, i64} @__size_returning_new_hot_cold(i64, i8)
+    declare {ptr, i64} @__size_returning_new_aligned_hot_cold(i64, i64, i8)
+
+    !llvm.dbg.cu = !{!0}
+    !llvm.module.flags = !{!2, !3}
+
+    !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+    !1 = !DIFile(filename: "mock_file.cc", directory: "mock_dir")
+    !2 = !{i32 7, !"Dwarf Version", i32 5}
+    !3 = !{i32 2, !"Debug Info Version", i32 3}
+    !10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, unit: !0, retainedNodes: !12)
+    !11 = !DISubroutineType(types: !12)
+    !12 = !{}
+    !13 = !DILocation(line: 5, column: 10, scope: !10)
+    !14 = !DILocation(line: 6, column: 10, scope: !10)
+    !15 = !DILocation(line: 7, column: 10, scope: !10)
+    !16 = !DILocation(line: 8, column: 10, scope: !10)
+  )IR");
+
+  auto *F = M->getFunction("_Z3foov");
+  ASSERT_NE(F, nullptr);
+
+  TargetLibraryInfoWrapperPass WrapperPass;
+  auto &TLI = WrapperPass.getTLI(*F);
+
+  auto Guid = Function::getGUID("_Z3foov");
+  // All the allocation sites are in foo().
+  MemProfRecord MockRecord =
+      MockMemProfReader::makeRecord({{Frame(Guid, 1, 10, false)},
+                                     {Frame(Guid, 2, 10, false)},
+                                     {Frame(Guid, 3, 10, false)},
+                                     {Frame(Guid, 4, 10, false)}});
+  // Set up mocks for the reader.
+  MockMemProfReader Reader;
+  EXPECT_CALL(Reader, getMemProfRecord(Guid)).WillOnce(Return(MockRecord));
+
+  MemProfUsePass Pass("/unused/profile/path");
+  std::map<uint64_t, MemProfUsePass::AllocMatchInfo> Unused;
+  Pass.readMemprof(*F, Reader, TLI, Unused);
+
+  // Since we only have a single type of behaviour for each allocation site, we
+  // only get function attributes.
+  std::vector<llvm::Attribute> CallsiteAttrs;
+  for (const auto &BB : *F) {
+    for (const auto &I : BB) {
+      if (auto *CI = dyn_cast<CallInst>(&I)) {
+        if (!CI->getCalledFunction()->getName().starts_with(
+                "__size_returning_new"))
+          continue;
+        Attribute Attr = CI->getFnAttr("memprof");
+        // The attribute will be invalid if it didn't find one named memprof.
+        ASSERT_TRUE(Attr.isValid());
+        CallsiteAttrs.push_back(Attr);
+      }
+    }
+  }
+
+  // We match all the variants including ones with the hint since we set
+  // ClMemProfMatchHotColdNew to true.
+  EXPECT_THAT(CallsiteAttrs, SizeIs(4));
+}
+
+} // namespace
+} // namespace memprof
+} // namespace llvm

From 710664341d72071729401d2eb86356056a3d7f46 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Mon, 26 Aug 2024 18:44:16 +0200
Subject: [PATCH 42/65] [libc++][NFC] Don't explicitly provide
 propagate_on_container_swap when calling __swap_allocator (#105980)

`__swap_allocator` does this automatically when not providing it
explicitly, so this is just more code without any benefit.
---
 libcxx/include/forward_list | 3 +--
 libcxx/include/vector       | 6 ++----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list
index b14d2cb6c78036..b8e3d05588f96e 100644
--- a/libcxx/include/forward_list
+++ b/libcxx/include/forward_list
@@ -620,8 +620,7 @@ inline void __forward_list_base<_Tp, _Alloc>::swap(__forward_list_base& __x)
     _NOEXCEPT_(!__node_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__node_allocator>)
 #endif
 {
-  std::__swap_allocator(
-      __alloc(), __x.__alloc(), integral_constant<bool, __node_traits::propagate_on_container_swap::value>());
+  std::__swap_allocator(__alloc(), __x.__alloc());
   using std::swap;
   swap(__before_begin()->__next_, __x.__before_begin()->__next_);
 }
diff --git a/libcxx/include/vector b/libcxx/include/vector
index 3aa23d8fc1e243..a858f458f44308 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -1821,8 +1821,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::swap(vector& __x)
   std::swap(this->__begin_, __x.__begin_);
   std::swap(this->__end_, __x.__end_);
   std::swap(this->__end_cap(), __x.__end_cap());
-  std::__swap_allocator(
-      this->__alloc(), __x.__alloc(), integral_constant<bool, __alloc_traits::propagate_on_container_swap::value>());
+  std::__swap_allocator(this->__alloc(), __x.__alloc());
 }
 
 template <class _Tp, class _Allocator>
@@ -2820,8 +2819,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::swap(vector& __x)
   std::swap(this->__begin_, __x.__begin_);
   std::swap(this->__size_, __x.__size_);
   std::swap(this->__cap(), __x.__cap());
-  std::__swap_allocator(
-      this->__alloc(), __x.__alloc(), integral_constant<bool, __alloc_traits::propagate_on_container_swap::value>());
+  std::__swap_allocator(this->__alloc(), __x.__alloc());
 }
 
 template <class _Allocator>

From 7af61d5cf464f1d716c82bc77907fa3fe4ebc841 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Mon, 26 Aug 2024 09:50:17 -0700
Subject: [PATCH 43/65] [flang][cuda] Add shape to cuf.data_transfer operation
 (#104631)

When doing data transfer with dynamic sized array, we are currently
generating a data transfer between two descriptors. If the shape values
can be provided, we can keep the data transfer between two references.
This patch adds the shape operands to the operation.

This will be exploited in lowering in a follow up patch.
---
 .../flang/Optimizer/Dialect/CUF/CUFOps.td     |  3 +-
 flang/lib/Lower/Bridge.cpp                    | 15 +++++----
 flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp    |  5 +++
 flang/test/Fir/cuf-invalid.fir                | 31 +++++++++++++++++++
 4 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index e95af629ef32f1..f643674f1d5d6b 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -161,10 +161,11 @@ def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
 
   let arguments = (ins Arg<AnyType, "", [MemRead]>:$src,
                        Arg<AnyRefOrBoxType, "", [MemWrite]>:$dst,
+                       Optional<fir_ShapeType>:$shape,
                        cuf_DataTransferKindAttr:$transfer_kind);
 
   let assemblyFormat = [{
-    $src `to` $dst attr-dict `:` type(operands)
+    $src `to` $dst (`,` $shape^ `:` type($shape) )? attr-dict `:` type($src) `,` type($dst)
   }];
 
   let hasVerifier = 1;
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ccbb481f472d81..24cd6b22b89259 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4272,18 +4272,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
           base = convertOp.getValue();
         // Special case if the rhs is a constant.
         if (matchPattern(base.getDefiningOp(), mlir::m_Constant())) {
-          builder.create<cuf::DataTransferOp>(loc, base, lhsVal,
-                                              transferKindAttr);
+          builder.create<cuf::DataTransferOp>(
+              loc, base, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr);
         } else {
           auto associate = hlfir::genAssociateExpr(
               loc, builder, rhs, rhs.getType(), ".cuf_host_tmp");
           builder.create<cuf::DataTransferOp>(loc, associate.getBase(), lhsVal,
+                                              /*shape=*/mlir::Value{},
                                               transferKindAttr);
           builder.create<hlfir::EndAssociateOp>(loc, associate);
         }
       } else {
-        builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal,
-                                            transferKindAttr);
+        builder.create<cuf::DataTransferOp>(
+            loc, rhsVal, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr);
       }
       return;
     }
@@ -4293,6 +4294,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       auto transferKindAttr = cuf::DataTransferKindAttr::get(
           builder.getContext(), cuf::DataTransferKind::DeviceHost);
       builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal,
+                                          /*shape=*/mlir::Value{},
                                           transferKindAttr);
       return;
     }
@@ -4303,6 +4305,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       auto transferKindAttr = cuf::DataTransferKindAttr::get(
           builder.getContext(), cuf::DataTransferKind::DeviceDevice);
       builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal,
+                                          /*shape=*/mlir::Value{},
                                           transferKindAttr);
       return;
     }
@@ -4346,8 +4349,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
           addSymbol(sym,
                     hlfir::translateToExtendedValue(loc, builder, temp).first,
                     /*forced=*/true);
-          builder.create<cuf::DataTransferOp>(loc, addr, temp,
-                                              transferKindAttr);
+          builder.create<cuf::DataTransferOp>(
+              loc, addr, temp, /*shape=*/mlir::Value{}, transferKindAttr);
           ++nbDeviceResidentObject;
         }
       }
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index f7b36b208a7deb..3b4ad95cafe6b5 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -99,6 +99,11 @@ llvm::LogicalResult cuf::AllocateOp::verify() {
 llvm::LogicalResult cuf::DataTransferOp::verify() {
   mlir::Type srcTy = getSrc().getType();
   mlir::Type dstTy = getDst().getType();
+  if (getShape()) {
+    if (!fir::isa_ref_type(srcTy) || !fir::isa_ref_type(dstTy))
+      return emitOpError()
+             << "shape can only be specified on data transfer with references";
+  }
   if ((fir::isa_ref_type(srcTy) && fir::isa_ref_type(dstTy)) ||
       (fir::isa_box_type(srcTy) && fir::isa_box_type(dstTy)) ||
       (fir::isa_ref_type(srcTy) && fir::isa_box_type(dstTy)) ||
diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir
index 06e08d14b2435c..e9aeaa281e2a85 100644
--- a/flang/test/Fir/cuf-invalid.fir
+++ b/flang/test/Fir/cuf-invalid.fir
@@ -94,3 +94,34 @@ func.func @_QPsub1() {
   cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
   return
 }
+
+// -----
+
+func.func @_QPsub1(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "adev"}, %arg1: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "ahost"}, %arg2: !fir.ref<i32> {fir.bindc_name = "n"}, %arg3: !fir.ref<i32> {fir.bindc_name = "m"}) {
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFsub1En"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg3 dummy_scope %0 {uniq_name = "_QFsub1Em"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.load %1#0 : !fir.ref<i32>
+  %4 = fir.load %2#0 : !fir.ref<i32>
+  %5 = arith.muli %3, %4 : i32
+  %6 = fir.convert %5 : (i32) -> i64
+  %7 = fir.convert %6 : (i64) -> index
+  %c0 = arith.constant 0 : index
+  %8 = arith.cmpi sgt, %7, %c0 : index
+  %9 = arith.select %8, %7, %c0 : index
+  %10 = fir.shape %9 : (index) -> !fir.shape<1>
+  %11:2 = hlfir.declare %arg0(%10) dummy_scope %0 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+  %12 = fir.load %1#0 : !fir.ref<i32>
+  %13 = fir.load %2#0 : !fir.ref<i32>
+  %14 = arith.muli %12, %13 : i32
+  %15 = fir.convert %14 : (i32) -> i64
+  %16 = fir.convert %15 : (i64) -> index
+  %c0_0 = arith.constant 0 : index
+  %17 = arith.cmpi sgt, %16, %c0_0 : index
+  %18 = arith.select %17, %16, %c0_0 : index
+  %19 = fir.shape %18 : (index) -> !fir.shape<1>
+  %20:2 = hlfir.declare %arg1(%19) dummy_scope %0 {uniq_name = "_QFsub1Eahost"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+  // expected-error@+1{{'cuf.data_transfer' op shape can only be specified on data transfer with references}}
+  cuf.data_transfer %20#0 to %11#0, %19 : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>
+  return
+}

From bbf2781bc49aee4d7ee8ec40dcf7316db360c454 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Mon, 26 Aug 2024 10:05:30 -0700
Subject: [PATCH 44/65] [lldb] Reformat comment (NFC)

---
 lldb/include/lldb/Utility/Status.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lldb/include/lldb/Utility/Status.h b/lldb/include/lldb/Utility/Status.h
index fa5768141fa45d..a80ebe89e562dd 100644
--- a/lldb/include/lldb/Utility/Status.h
+++ b/lldb/include/lldb/Utility/Status.h
@@ -181,11 +181,12 @@ class Status {
   bool Success() const;
 
 protected:
-  /// Member variables
-  ValueType m_code = 0; ///< Status code as an integer value.
-  lldb::ErrorType m_type =
-      lldb::eErrorTypeInvalid;  ///< The type of the above error code.
-  mutable std::string m_string; ///< A string representation of the error code.
+  /// Status code as an integer value.
+  ValueType m_code = 0;
+  /// The type of the above error code.
+  lldb::ErrorType m_type = lldb::eErrorTypeInvalid;
+  /// A string representation of the error code.
+  mutable std::string m_string;
 private:
   explicit Status(const llvm::formatv_object_base &payload) {
     SetErrorToGenericError();

From c073821142d1bda68682f8ff640d143cbb03ae7b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Tue, 27 Aug 2024 01:11:46 +0800
Subject: [PATCH 45/65] [RISCV] Reduce VL of vmerge.vvm's true operand
 (#105786)

This extends the peephole added in #104689 to also reduce the VL of a
PseudoVMERGE_VVM's true operand.

We could extend this later to reduce the false operand as well, but this
starts with just the true operand since it allows vmerges that are
converted to vmv.v.vs (convertVMergeToVMv) to be potentially further
folded into their source (foldVMV_V_V).
---
 llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp                  | 3 +++
 llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll | 3 +--
 llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll        | 3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 822ab492c710b4..34e5d9224f7150 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -131,6 +131,9 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
   case RISCV::VMV_V_V:
     SrcIdx = 2;
     break;
+  case RISCV::VMERGE_VVM:
+    SrcIdx = 3; // TODO: We can also handle the false operand.
+    break;
   }
 
   MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
index d26fd0ca26c729..3a439cdb996fac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll
@@ -159,9 +159,8 @@ define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %pa
 define <vscale x 2 x i32> @vmerge_smaller_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
 ; CHECK-LABEL: vmerge_smaller_vl_different_passthru:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, mu
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
 ; CHECK-NEXT:    vadd.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-NEXT:    vmv.v.v v9, v8
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 39055dc5adfcf7..6700920cebff0a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1072,9 +1072,8 @@ define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %pa
 define <vscale x 2 x i32> @vmerge_smaller_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) {
 ; CHECK-LABEL: vmerge_smaller_vl_different_passthru:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
-; CHECK-NEXT:    vadd.vv v8, v10, v11
 ; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v11
 ; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret

From 2ef3dcf1fd4b3a2fc849b113644533f3e6df8b1e Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Mon, 26 Aug 2024 10:12:00 -0700
Subject: [PATCH 46/65] [mlir] Fix a warning

This patch fixes:

  mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp:906:11: error:
  enumeration value 'EmptyConvolvedDims' not handled in switch
  [-Werror,-Wswitch]

with a workaround.  I've notified the author of the new enum value in
https://github.com/llvm/llvm-project/pull/102087.
---
 mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index a38b20eed3a00c..d5c21fb5d845e9 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -918,6 +918,8 @@ mlir::linalg::detail::getMatchConvolutionMessage(MatchConvolutionResult res) {
     return "expected all iterators used to access outputs to be parallel";
   case MatchConvolutionResult::NonOutputDimNotReduction:
     return "expected all iterators not used to access outputs to be reduction";
+  case MatchConvolutionResult::EmptyConvolvedDims:
+    return "FIXME";
   case MatchConvolutionResult::Success:
     return "";
   }

From ec4d5a6658782b4a88c634d5b332c56b754c5949 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Mon, 26 Aug 2024 19:20:18 +0200
Subject: [PATCH 47/65] [LLD][COFF] Preserve original symbol name when
 resolving weak aliases. (#105897)

Instead of replacing it with target's name.
---
 lld/COFF/SymbolTable.cpp  | 14 +-------------
 lld/COFF/Symbols.cpp      | 23 +++++++++++++++++++++++
 lld/COFF/Symbols.h        |  3 +++
 lld/test/COFF/symtab.test | 18 ++++++++++++++++++
 4 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 1dfff0a90f4aee..a5f155bc05bc9e 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -494,20 +494,8 @@ void SymbolTable::resolveRemainingUndefines() {
     StringRef name = undef->getName();
 
     // A weak alias may have been resolved, so check for that.
-    if (Defined *d = undef->getWeakAlias()) {
-      // We want to replace Sym with D. However, we can't just blindly
-      // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
-      // internal symbol, and internal symbols are stored as "unparented"
-      // Symbols. For that reason we need to check which type of symbol we
-      // are dealing with and copy the correct number of bytes.
-      if (isa<DefinedRegular>(d))
-        memcpy(sym, d, sizeof(DefinedRegular));
-      else if (isa<DefinedAbsolute>(d))
-        memcpy(sym, d, sizeof(DefinedAbsolute));
-      else
-        memcpy(sym, d, sizeof(SymbolUnion));
+    if (undef->resolveWeakAlias())
       continue;
-    }
 
     // If we can resolve a symbol by removing __imp_ prefix, do that.
     // This odd rule is for compatibility with MSVC linker.
diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp
index ff8ad1e619116f..b098abb80d6f1e 100644
--- a/lld/COFF/Symbols.cpp
+++ b/lld/COFF/Symbols.cpp
@@ -136,6 +136,29 @@ Defined *Undefined::getWeakAlias() {
   return nullptr;
 }
 
+bool Undefined::resolveWeakAlias() {
+  Defined *d = getWeakAlias();
+  if (!d)
+    return false;
+
+  // We want to replace Sym with D. However, we can't just blindly
+  // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
+  // internal symbol, and internal symbols are stored as "unparented"
+  // Symbols. For that reason we need to check which type of symbol we
+  // are dealing with and copy the correct number of bytes.
+  StringRef name = getName();
+  if (isa<DefinedRegular>(d))
+    memcpy(this, d, sizeof(DefinedRegular));
+  else if (isa<DefinedAbsolute>(d))
+    memcpy(this, d, sizeof(DefinedAbsolute));
+  else
+    memcpy(this, d, sizeof(SymbolUnion));
+
+  nameData = name.data();
+  nameSize = name.size();
+  return true;
+}
+
 MemoryBufferRef LazyArchive::getMemberBuffer() {
   Archive::Child c =
       CHECK(sym.getMember(), "could not get the member for symbol " +
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index 56b137d56873aa..c427a062dc82b2 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -341,6 +341,9 @@ class Undefined : public Symbol {
   // symbol by searching the chain of fallback symbols. Returns the symbol if
   // successful, otherwise returns null.
   Defined *getWeakAlias();
+
+  // If this symbol is external weak, replace this object with aliased symbol.
+  bool resolveWeakAlias();
 };
 
 // Windows-specific classes.
diff --git a/lld/test/COFF/symtab.test b/lld/test/COFF/symtab.test
index 45e8ed39737a46..6ef2b4d47503c7 100644
--- a/lld/test/COFF/symtab.test
+++ b/lld/test/COFF/symtab.test
@@ -86,6 +86,15 @@
 # CHECK-NEXT:     StorageClass: External (0x2)
 # CHECK-NEXT:     AuxSymbolCount: 0
 # CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: weak_main
+# CHECK-NEXT:     Value: 0
+# CHECK-NEXT:     Section: .text (1)
+# CHECK-NEXT:     BaseType: Null (0x0)
+# CHECK-NEXT:     ComplexType: Null (0x0)
+# CHECK-NEXT:     StorageClass: External (0x2)
+# CHECK-NEXT:     AuxSymbolCount: 0
+# CHECK-NEXT:   }
 # CHECK-NEXT: ]
 
 # NO:       Symbols [
@@ -237,4 +246,13 @@ symbols:
     SimpleType:      IMAGE_SYM_TYPE_NULL
     ComplexType:     IMAGE_SYM_DTYPE_NULL
     StorageClass:    IMAGE_SYM_CLASS_LABEL
+  - Name:            weak_main
+    Value:           0
+    SectionNumber:   0
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_WEAK_EXTERNAL
+    WeakExternal:
+      TagIndex:        10
+      Characteristics: IMAGE_WEAK_EXTERN_SEARCH_ALIAS
 ...

From 4bab0387e9be3683a36b5ab0412b25fbab632aa5 Mon Sep 17 00:00:00 2001
From: Chris B <chris.bieneman@me.com>
Date: Mon, 26 Aug 2024 12:31:45 -0500
Subject: [PATCH 48/65] [HLSL] Add
 __builtin_hlsl_is_scalarized_layout_compatible (#102227)

HLSL tends to rely pretty aggressively on scalarization occuring in the
complier, which allows for some relaxed language behaviors when types
are fully sclarized to equivalent scalar representations.

This change adds a new queryable trait builtin for scalarized layout
compatability.

Resolves #100614

---------

Co-authored-by: Aaron Ballman <aaron@aaronballman.com>
---
 clang/include/clang/Basic/TokenKinds.def      |   3 +
 clang/include/clang/Sema/SemaHLSL.h           |   3 +
 clang/lib/Sema/SemaExprCXX.cpp                |  18 +++
 clang/lib/Sema/SemaHLSL.cpp                   |  82 +++++++++++
 .../Traits/ScalarizedLayoutCompatible.hlsl    | 132 ++++++++++++++++++
 .../ScalarizedLayoutCompatibleErrors.hlsl     |  64 +++++++++
 6 files changed, 302 insertions(+)
 create mode 100644 clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl
 create mode 100644 clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl

diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index d683106bb0e298..212c1f6ff3a124 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -660,6 +660,9 @@ KEYWORD(out                         , KEYHLSL)
 #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) KEYWORD(Name, KEYHLSL)
 #include "clang/Basic/HLSLIntangibleTypes.def"
 
+// HLSL Type traits.
+TYPE_TRAIT_2(__builtin_hlsl_is_scalarized_layout_compatible, IsScalarizedLayoutCompatible, KEYHLSL)
+
 // OpenMP Type Traits
 UNARY_EXPR_OR_TYPE_TRAIT(__builtin_omp_required_simd_align, OpenMPRequiredSimdAlign, KEYALL)
 
diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h
index 3aae3383c215b5..5277fb57a23343 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -61,6 +61,9 @@ class SemaHLSL : public SemaBase {
   void handleParamModifierAttr(Decl *D, const ParsedAttr &AL);
 
   bool CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+
+  // HLSL Type trait implementations
+  bool IsScalarizedLayoutCompatible(QualType T1, QualType T2) const;
 };
 
 } // namespace clang
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 746c67ff1e979f..d8719ab26cc83f 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -39,6 +39,7 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaCUDA.h"
+#include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaLambda.h"
 #include "clang/Sema/SemaObjC.h"
@@ -6248,6 +6249,23 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, const TypeSourceI
                TSTToBeDeduced->getTemplateName().getAsTemplateDecl(), RhsT,
                Info) == TemplateDeductionResult::Success;
   }
+  case BTT_IsScalarizedLayoutCompatible: {
+    if (!LhsT->isVoidType() && !LhsT->isIncompleteArrayType() &&
+        Self.RequireCompleteType(Lhs->getTypeLoc().getBeginLoc(), LhsT,
+                                 diag::err_incomplete_type))
+      return true;
+    if (!RhsT->isVoidType() && !RhsT->isIncompleteArrayType() &&
+        Self.RequireCompleteType(Rhs->getTypeLoc().getBeginLoc(), RhsT,
+                                 diag::err_incomplete_type))
+      return true;
+
+    DiagnoseVLAInCXXTypeTrait(
+        Self, Lhs, tok::kw___builtin_hlsl_is_scalarized_layout_compatible);
+    DiagnoseVLAInCXXTypeTrait(
+        Self, Rhs, tok::kw___builtin_hlsl_is_scalarized_layout_compatible);
+
+    return Self.HLSL().IsScalarizedLayoutCompatible(LhsT, RhsT);
+  }
   default:
     llvm_unreachable("not a BTT");
   }
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 17cb47f80590d9..714e8f5cfa9926 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1524,3 +1524,85 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   }
   return false;
 }
+
+static void BuildFlattenedTypeList(QualType BaseTy,
+                                   llvm::SmallVectorImpl<QualType> &List) {
+  llvm::SmallVector<QualType, 16> WorkList;
+  WorkList.push_back(BaseTy);
+  while (!WorkList.empty()) {
+    QualType T = WorkList.pop_back_val();
+    T = T.getCanonicalType().getUnqualifiedType();
+    assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL");
+    if (const auto *AT = dyn_cast<ConstantArrayType>(T)) {
+      llvm::SmallVector<QualType, 16> ElementFields;
+      // Generally I've avoided recursion in this algorithm, but arrays of
+      // structs could be time-consuming to flatten and churn through on the
+      // work list. Hopefully nesting arrays of structs containing arrays
+      // of structs too many levels deep is unlikely.
+      BuildFlattenedTypeList(AT->getElementType(), ElementFields);
+      // Repeat the element's field list n times.
+      for (uint64_t Ct = 0; Ct < AT->getZExtSize(); ++Ct)
+        List.insert(List.end(), ElementFields.begin(), ElementFields.end());
+      continue;
+    }
+    // Vectors can only have element types that are builtin types, so this can
+    // add directly to the list instead of to the WorkList.
+    if (const auto *VT = dyn_cast<VectorType>(T)) {
+      List.insert(List.end(), VT->getNumElements(), VT->getElementType());
+      continue;
+    }
+    if (const auto *RT = dyn_cast<RecordType>(T)) {
+      const RecordDecl *RD = RT->getDecl();
+      if (RD->isUnion()) {
+        List.push_back(T);
+        continue;
+      }
+      const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(RD);
+
+      llvm::SmallVector<QualType, 16> FieldTypes;
+      if (CXXD && CXXD->isStandardLayout())
+        RD = CXXD->getStandardLayoutBaseWithFields();
+
+      for (const auto *FD : RD->fields())
+        FieldTypes.push_back(FD->getType());
+      // Reverse the newly added sub-range.
+      std::reverse(FieldTypes.begin(), FieldTypes.end());
+      WorkList.insert(WorkList.end(), FieldTypes.begin(), FieldTypes.end());
+
+      // If this wasn't a standard layout type we may also have some base
+      // classes to deal with.
+      if (CXXD && !CXXD->isStandardLayout()) {
+        FieldTypes.clear();
+        for (const auto &Base : CXXD->bases())
+          FieldTypes.push_back(Base.getType());
+        std::reverse(FieldTypes.begin(), FieldTypes.end());
+        WorkList.insert(WorkList.end(), FieldTypes.begin(), FieldTypes.end());
+      }
+      continue;
+    }
+    List.push_back(T);
+  }
+}
+
+bool SemaHLSL::IsScalarizedLayoutCompatible(QualType T1, QualType T2) const {
+  if (T1.isNull() || T2.isNull())
+    return false;
+
+  T1 = T1.getCanonicalType().getUnqualifiedType();
+  T2 = T2.getCanonicalType().getUnqualifiedType();
+
+  // If both types are the same canonical type, they're obviously compatible.
+  if (SemaRef.getASTContext().hasSameType(T1, T2))
+    return true;
+
+  llvm::SmallVector<QualType, 16> T1Types;
+  BuildFlattenedTypeList(T1, T1Types);
+  llvm::SmallVector<QualType, 16> T2Types;
+  BuildFlattenedTypeList(T2, T2Types);
+
+  // Check the flattened type list
+  return llvm::equal(T1Types, T2Types,
+                     [this](QualType LHS, QualType RHS) -> bool {
+                       return SemaRef.IsLayoutCompatible(LHS, RHS);
+                     });
+}
diff --git a/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl
new file mode 100644
index 00000000000000..db46a8e1414953
--- /dev/null
+++ b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatible.hlsl
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -verify %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s
+// expected-no-diagnostics
+
+// Case 1: How many ways can I come up with to represent three float values?
+struct ThreeFloats1 {
+  float X, Y, Z;
+};
+
+struct ThreeFloats2 {
+  float X[3];
+};
+
+struct ThreeFloats3 {
+  float3 V;
+};
+
+struct ThreeFloats4 {
+  float2 V;
+  float F;
+};
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, float[3]), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats1), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats2), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats3), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(float3, ThreeFloats4), "");
+
+// Case 2: structs and base classes and arrays, oh my!
+struct Dog {
+  int Leg[4];
+  bool Tail;
+  float Fur;
+};
+
+struct Shiba {
+  int4 StubbyLegs;
+  bool CurlyTail;
+  struct Coating {
+    float Fur;
+  } F;
+};
+
+struct FourLegged {
+  int FR, FL, BR, BL;
+};
+
+struct Doggo : FourLegged {
+  bool WaggyBit;
+  float Fuzz;
+};
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Shiba), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Doggo), "");
+
+// Case 3: Arrays of structs inside structs
+
+struct Cat {
+  struct Leg {
+    int L;
+  } Legs[4];
+  struct Other {
+    bool Tail;
+    float Furs;
+  } Bits;
+};
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Dog, Cat), "");
+
+// case 4: Arrays of structs inside arrays of structs.
+struct Pets {
+  Dog Puppers[6];
+  Cat Kitties[4];
+};
+
+struct Animals {
+  Dog Puppers[2];
+  Cat Kitties[8];
+};
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Pets, Animals), "");
+
+// Case 5: Turtles all the way down...
+
+typedef int Turtle;
+
+enum Ninja : Turtle {
+  Leonardo,
+  Donatello,
+  Michelangelo,
+  Raphael,
+};
+
+enum NotNinja : Turtle {
+  Fred,
+  Mikey,
+};
+
+enum Mammals : uint {
+  Dog,
+  Cat,
+};
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Ninja, NotNinja), "");
+_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Ninja, Mammals), "");
+
+// Case 6: Some basic types.
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(int, int32_t), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(uint, uint32_t), "");
+_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(int, uint), "");
+_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(int, float), "");
+
+// Even though half and float may be the same size we don't want them to be
+// layout compatible since they are different types.
+_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(half, float), "");
+
+// Case 6: Empty classes... because they're fun.
+
+struct NotEmpty { int X; };
+struct Empty {};
+struct AlsoEmpty {};
+
+struct DerivedEmpty : Empty {};
+
+struct DerivedNotEmpty : Empty { int X; };
+struct DerivedEmptyNotEmptyBase : NotEmpty {};
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Empty, AlsoEmpty), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Empty, DerivedEmpty), "");
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(NotEmpty, DerivedNotEmpty), "");
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(NotEmpty, DerivedEmptyNotEmptyBase), "");
diff --git a/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl
new file mode 100644
index 00000000000000..4c96795da7fd0c
--- /dev/null
+++ b/clang/test/SemaHLSL/Types/Traits/ScalarizedLayoutCompatibleErrors.hlsl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library  -finclude-default-header -verify %s
+
+// Some things that don't work!
+
+// Case 1: Both types must be complete!
+struct Defined {
+  int X;
+};
+
+
+struct Undefined; // expected-note {{forward declaration of 'Undefined'}}
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Undefined, Defined), ""); // expected-error{{incomplete type 'Undefined' where a complete type is required}}
+
+// Case 2: No variable length arrays!
+
+void fn(int X) {
+  // expected-error@#vla {{variable length arrays are not supported for the current target}}
+  // expected-error@#vla {{variable length arrays are not supported in '__builtin_hlsl_is_scalarized_layout_compatible'}}
+  // expected-error@#vla {{static assertion failed due to requirement '__builtin_hlsl_is_scalarized_layout_compatible(int[4], int[X])'}}
+  // expected-warning@#vla {{variable length arrays in C++ are a Clang extension}}
+  _Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(int[4], int[X]), ""); // #vla
+}
+
+// Case 3: Make this always fail for unions.
+// HLSL doesn't really support unions, and the places where scalarized layouts
+// are valid is probably going to be really confusing for unions, so we should
+// just make sure unions are never scalarized compatible with anything other
+// than themselves.
+
+union Wah {
+  int OhNo;
+  float NotAgain;
+};
+
+struct OneInt {
+  int I;
+};
+
+struct OneFloat {
+  float F;
+};
+
+struct HasUnion {
+  int I;
+  Wah W;
+};
+
+struct HasUnionSame {
+  int I;
+  Wah W;
+};
+
+struct HasUnionDifferent {
+  Wah W;
+  int I;
+};
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(Wah, Wah), "Identical types are always compatible");
+_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Wah, OneInt), "Unions are not compatible with anything else");
+_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(Wah, OneFloat), "Unions are not compatible with anything else");
+
+_Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(HasUnion, HasUnionSame), "");
+_Static_assert(!__builtin_hlsl_is_scalarized_layout_compatible(HasUnion, HasUnionDifferent), "");

From ff5816ad29eba3762e1c5c576c1adf586c35dd91 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com>
Date: Mon, 26 Aug 2024 13:40:11 -0400
Subject: [PATCH 49/65] [DirectX] Add `all` lowering (#105787)

- DXILIntrinsicExpansion.cpp: Modify `any` codegen expansion to work for
`all`
- DirectX\all.ll: Add test case

completes #88946
---
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 51 ++++++------
 llvm/test/CodeGen/DirectX/all.ll              | 83 +++++++++++++++++++
 2 files changed, 110 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/all.ll

diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index e49169cff8aa86..2daa4f825c3b25 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -38,6 +38,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::log:
   case Intrinsic::log10:
   case Intrinsic::pow:
+  case Intrinsic::dx_all:
   case Intrinsic::dx_any:
   case Intrinsic::dx_clamp:
   case Intrinsic::dx_uclamp:
@@ -54,8 +55,7 @@ static bool isIntrinsicExpansion(Function &F) {
 
 static Value *expandAbs(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
   Type *Ty = X->getType();
   Type *EltTy = Ty->getScalarType();
   Constant *Zero = Ty->isVectorTy()
@@ -148,8 +148,7 @@ static Value *expandIntegerDotIntrinsic(CallInst *Orig,
 
 static Value *expandExpIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
   Type *Ty = X->getType();
   Type *EltTy = Ty->getScalarType();
   Constant *Log2eConst =
@@ -166,13 +165,21 @@ static Value *expandExpIntrinsic(CallInst *Orig) {
   return Exp2Call;
 }
 
-static Value *expandAnyIntrinsic(CallInst *Orig) {
+static Value *expandAnyOrAllIntrinsic(CallInst *Orig,
+                                      Intrinsic::ID intrinsicId) {
   Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
   Type *Ty = X->getType();
   Type *EltTy = Ty->getScalarType();
 
+  auto ApplyOp = [&Builder](Intrinsic::ID IntrinsicId, Value *Result,
+                            Value *Elt) {
+    if (IntrinsicId == Intrinsic::dx_any)
+      return Builder.CreateOr(Result, Elt);
+    assert(IntrinsicId == Intrinsic::dx_all);
+    return Builder.CreateAnd(Result, Elt);
+  };
+
   Value *Result = nullptr;
   if (!Ty->isVectorTy()) {
     Result = EltTy->isFloatingPointTy()
@@ -193,7 +200,7 @@ static Value *expandAnyIntrinsic(CallInst *Orig) {
     Result = Builder.CreateExtractElement(Cond, (uint64_t)0);
     for (unsigned I = 1; I < XVec->getNumElements(); I++) {
       Value *Elt = Builder.CreateExtractElement(Cond, I);
-      Result = Builder.CreateOr(Result, Elt);
+      Result = ApplyOp(intrinsicId, Result, Elt);
     }
   }
   return Result;
@@ -201,8 +208,7 @@ static Value *expandAnyIntrinsic(CallInst *Orig) {
 
 static Value *expandLengthIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
   Type *Ty = X->getType();
   Type *EltTy = Ty->getScalarType();
 
@@ -230,8 +236,7 @@ static Value *expandLerpIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   Value *Y = Orig->getOperand(1);
   Value *S = Orig->getOperand(2);
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
   auto *V = Builder.CreateFSub(Y, X);
   V = Builder.CreateFMul(S, V);
   return Builder.CreateFAdd(X, V, "dx.lerp");
@@ -240,8 +245,7 @@ static Value *expandLerpIntrinsic(CallInst *Orig) {
 static Value *expandLogIntrinsic(CallInst *Orig,
                                  float LogConstVal = numbers::ln2f) {
   Value *X = Orig->getOperand(0);
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
   Type *Ty = X->getType();
   Type *EltTy = Ty->getScalarType();
   Constant *Ln2Const =
@@ -266,8 +270,7 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   Type *Ty = Orig->getType();
   Type *EltTy = Ty->getScalarType();
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
 
   auto *XVec = dyn_cast<FixedVectorType>(Ty);
   if (!XVec) {
@@ -305,8 +308,7 @@ static Value *expandPowIntrinsic(CallInst *Orig) {
   Value *X = Orig->getOperand(0);
   Value *Y = Orig->getOperand(1);
   Type *Ty = X->getType();
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
 
   auto *Log2Call =
       Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
@@ -350,8 +352,7 @@ static Value *expandClampIntrinsic(CallInst *Orig,
   Value *Min = Orig->getOperand(1);
   Value *Max = Orig->getOperand(2);
   Type *Ty = X->getType();
-  IRBuilder<> Builder(Orig->getParent());
-  Builder.SetInsertPoint(Orig);
+  IRBuilder<> Builder(Orig);
   auto *MaxCall = Builder.CreateIntrinsic(
       Ty, getMaxForClamp(Ty, ClampIntrinsic), {X, Min}, nullptr, "dx.max");
   return Builder.CreateIntrinsic(Ty, getMinForClamp(Ty, ClampIntrinsic),
@@ -360,7 +361,8 @@ static Value *expandClampIntrinsic(CallInst *Orig,
 
 static bool expandIntrinsic(Function &F, CallInst *Orig) {
   Value *Result = nullptr;
-  switch (F.getIntrinsicID()) {
+  Intrinsic::ID IntrinsicId = F.getIntrinsicID();
+  switch (IntrinsicId) {
   case Intrinsic::abs:
     Result = expandAbs(Orig);
     break;
@@ -376,12 +378,13 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::pow:
     Result = expandPowIntrinsic(Orig);
     break;
+  case Intrinsic::dx_all:
   case Intrinsic::dx_any:
-    Result = expandAnyIntrinsic(Orig);
+    Result = expandAnyOrAllIntrinsic(Orig, IntrinsicId);
     break;
   case Intrinsic::dx_uclamp:
   case Intrinsic::dx_clamp:
-    Result = expandClampIntrinsic(Orig, F.getIntrinsicID());
+    Result = expandClampIntrinsic(Orig, IntrinsicId);
     break;
   case Intrinsic::dx_lerp:
     Result = expandLerpIntrinsic(Orig);
@@ -397,7 +400,7 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
     break;
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
-    Result = expandIntegerDotIntrinsic(Orig, F.getIntrinsicID());
+    Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);
     break;
   }
 
diff --git a/llvm/test/CodeGen/DirectX/all.ll b/llvm/test/CodeGen/DirectX/all.ll
new file mode 100644
index 00000000000000..1c0b6486dc9358
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/all.ll
@@ -0,0 +1,83 @@
+; RUN: opt -S -passes=dxil-intrinsic-expansion,dxil-op-lower -mtriple=dxil-pc-shadermodel6.0-library < %s | FileCheck %s
+
+; Make sure dxil operation function calls for all are generated for float and half.
+
+; CHECK-LABEL: all_bool
+; CHECK: icmp ne i1 %{{.*}}, false
+define noundef i1 @all_bool(i1 noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.i1(i1 %p0)
+  ret i1 %dx.all
+}
+
+; CHECK-LABEL: all_int64_t
+; CHECK: icmp ne i64 %{{.*}}, 0
+define noundef i1 @all_int64_t(i64 noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.i64(i64 %p0)
+  ret i1 %dx.all
+}
+
+; CHECK-LABEL: all_int
+; CHECK: icmp ne i32 %{{.*}}, 0
+define noundef i1 @all_int(i32 noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.i32(i32 %p0)
+  ret i1 %dx.all
+}
+
+; CHECK-LABEL: all_int16_t
+; CHECK: icmp ne i16 %{{.*}}, 0
+define noundef i1 @all_int16_t(i16 noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.i16(i16 %p0)
+  ret i1 %dx.all
+}
+
+; CHECK-LABEL: all_double
+; CHECK: fcmp une double %{{.*}}, 0.000000e+00
+define noundef i1 @all_double(double noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.f64(double %p0)
+  ret i1 %dx.all
+}
+
+; CHECK-LABEL: all_float
+; CHECK: fcmp une float %{{.*}}, 0.000000e+00
+define noundef i1 @all_float(float noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.f32(float %p0)
+  ret i1 %dx.all
+}
+
+; CHECK-LABEL: all_half
+; CHECK: fcmp une half %{{.*}}, 0xH0000
+define noundef i1 @all_half(half noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.f16(half %p0)
+  ret i1 %dx.all
+}
+
+; CHECK-LABEL: all_bool4
+; CHECK: icmp ne <4 x i1> %{{.*}}, zeroinitialize
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 0
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 1
+; CHECK: and i1  %{{.*}}, %{{.*}}
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 2
+; CHECK: and i1  %{{.*}}, %{{.*}}
+; CHECK: extractelement <4 x i1> %{{.*}}, i64 3
+; CHECK: and i1  %{{.*}}, %{{.*}}
+define noundef i1 @all_bool4(<4 x i1> noundef %p0) {
+entry:
+  %dx.all = call i1 @llvm.dx.all.v4i1(<4 x i1> %p0)
+  ret i1 %dx.all
+}
+
+declare i1 @llvm.dx.all.v4i1(<4 x i1>)
+declare i1 @llvm.dx.all.i1(i1)
+declare i1 @llvm.dx.all.i16(i16)
+declare i1 @llvm.dx.all.i32(i32)
+declare i1 @llvm.dx.all.i64(i64)
+declare i1 @llvm.dx.all.f16(half)
+declare i1 @llvm.dx.all.f32(float)
+declare i1 @llvm.dx.all.f64(double)

From 643bf6cb01bc3faa54c5510c904ea3bdcb4bf42f Mon Sep 17 00:00:00 2001
From: Joshua Baehring <98630690+JoshuaMBa@users.noreply.github.com>
Date: Mon, 26 Aug 2024 13:44:39 -0400
Subject: [PATCH 50/65] [scudo] Add partial chunk heuristic to retrieval
 algorithm. (#105009)

Previously the secondary cache retrieval algorithm would not allow
retrievals of memory chunks where the number of unused bytes would be
greater than than `MaxUnreleasedCachePages * PageSize` bytes. This meant
that even if a memory chunk satisfied the requirements of the optimal
fit algorithm, it may not be returned. This remains true if memory
tagging is enabled. However, if memory tagging is disabled, a new
heuristic has been put in place. Specifically, If a memory chunk is a
non-optimal fit, the cache retrieval algorithm will attempt to release
the excess memory to force a cache hit while keeping RSS down.

In the event that a memory chunk is a non-optimal fit, the retrieval
algorithm will release excess memory as long as the amount of memory to
be released is less than or equal to 4 Pages. If the amount of memory to
be released exceeds 4 Pages, the retrieval algorithm will not consider
that cached memory chunk valid for retrieval.

This change also addresses an alignment issue in a test case submitted
in #104807.
---
 compiler-rt/lib/scudo/standalone/secondary.h  | 130 ++++++++++++++----
 .../scudo/standalone/tests/secondary_test.cpp |  32 ++++-
 2 files changed, 128 insertions(+), 34 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 27f8697db7838f..985e2392641ae2 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -72,6 +72,15 @@ namespace {
 struct CachedBlock {
   static constexpr u16 CacheIndexMax = UINT16_MAX;
   static constexpr u16 InvalidEntry = CacheIndexMax;
+  //   * MaxReleasedCachePages default is currently 4
+  //        - We arrived at this value after noticing that mapping
+  //        in larger memory regions performs better than releasing
+  //        memory and forcing a cache hit. According to the data,
+  //        it suggests that beyond 4 pages, the release execution time is
+  //        longer than the map execution time. In this way, the default
+  //        is dependent on the platform.
+  //    TODO: set MaxReleasedCachePages back to 4U
+  static constexpr uptr MaxReleasedCachePages = 0U;
 
   uptr CommitBase = 0;
   uptr CommitSize = 0;
@@ -90,8 +99,9 @@ struct CachedBlock {
 template <typename Config> class MapAllocatorNoCache {
 public:
   void init(UNUSED s32 ReleaseToOsInterval) {}
-  CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
-                       UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
+  CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
+                       UNUSED uptr Alignment, UNUSED uptr HeadersSize,
+                       UNUSED uptr &EntryHeaderPos) {
     return {};
   }
   void store(UNUSED Options Options, UNUSED uptr CommitBase,
@@ -121,7 +131,7 @@ template <typename Config> class MapAllocatorNoCache {
   }
 };
 
-static const uptr MaxUnusedCachePages = 4U;
+static const uptr MaxUnreleasedCachePages = 4U;
 
 template <typename Config>
 bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize,
@@ -151,9 +161,11 @@ bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize,
     }
   }
 
-  const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
-  if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) {
-    const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes);
+  const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize;
+  if (useMemoryTagging<Config>(Options) &&
+      CommitSize > MaxUnreleasedCacheBytes) {
+    const uptr UntaggedPos =
+        Max(AllocPos, CommitBase + MaxUnreleasedCacheBytes);
     return MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary",
                         MAP_MEMTAG | Flags) &&
            MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos,
@@ -334,13 +346,13 @@ class MapAllocatorCache {
     }
   }
 
-  CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
-                       uptr &EntryHeaderPos) EXCLUDES(Mutex) {
+  CachedBlock retrieve(uptr MaxAllowedFragmentedPages, uptr Size,
+                       uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
+      EXCLUDES(Mutex) {
     const uptr PageSize = getPageSizeCached();
     // 10% of the requested size proved to be the optimal choice for
     // retrieving cached blocks after testing several options.
     constexpr u32 FragmentedBytesDivisor = 10;
-    bool Found = false;
     CachedBlock Entry;
     EntryHeaderPos = 0;
     {
@@ -348,47 +360,100 @@ class MapAllocatorCache {
       CallsToRetrieve++;
       if (EntriesCount == 0)
         return {};
-      u32 OptimalFitIndex = 0;
+      u16 RetrievedIndex = CachedBlock::InvalidEntry;
       uptr MinDiff = UINTPTR_MAX;
-      for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+
+      //  Since allocation sizes don't always match cached memory chunk sizes
+      //  we allow some memory to be unused (called fragmented bytes). The
+      //  amount of unused bytes is exactly EntryHeaderPos - CommitBase.
+      //
+      //        CommitBase                CommitBase + CommitSize
+      //          V                              V
+      //      +---+------------+-----------------+---+
+      //      |   |            |                 |   |
+      //      +---+------------+-----------------+---+
+      //      ^                ^                     ^
+      //    Guard         EntryHeaderPos          Guard-page-end
+      //    page-begin
+      //
+      //  [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
+      //  well as the header metadata. If EntryHeaderPos - CommitBase exceeds
+      //  MaxAllowedFragmentedPages * PageSize, the cached memory chunk is
+      //  not considered valid for retrieval.
+      for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
            I = Entries[I].Next) {
         const uptr CommitBase = Entries[I].CommitBase;
         const uptr CommitSize = Entries[I].CommitSize;
         const uptr AllocPos =
             roundDown(CommitBase + CommitSize - Size, Alignment);
         const uptr HeaderPos = AllocPos - HeadersSize;
+        const uptr MaxAllowedFragmentedBytes =
+            MaxAllowedFragmentedPages * PageSize;
         if (HeaderPos > CommitBase + CommitSize)
           continue;
+        // TODO: Remove AllocPos > CommitBase + MaxAllowedFragmentedBytes
+        // and replace with Diff > MaxAllowedFragmentedBytes
         if (HeaderPos < CommitBase ||
-            AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
+            AllocPos > CommitBase + MaxAllowedFragmentedBytes) {
           continue;
         }
-        Found = true;
-        const uptr Diff = HeaderPos - CommitBase;
-        // immediately use a cached block if it's size is close enough to the
-        // requested size.
-        const uptr MaxAllowedFragmentedBytes =
-            (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
-        if (Diff <= MaxAllowedFragmentedBytes) {
-          OptimalFitIndex = I;
-          EntryHeaderPos = HeaderPos;
-          break;
-        }
-        // keep track of the smallest cached block
+
+        const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase;
+
+        // Keep track of the smallest cached block
         // that is greater than (AllocSize + HeaderSize)
-        if (Diff > MinDiff)
+        if (Diff >= MinDiff)
           continue;
-        OptimalFitIndex = I;
+
         MinDiff = Diff;
+        RetrievedIndex = I;
         EntryHeaderPos = HeaderPos;
+
+        // Immediately use a cached block if its size is close enough to the
+        // requested size
+        const uptr OptimalFitThesholdBytes =
+            (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
+        if (Diff <= OptimalFitThesholdBytes)
+          break;
       }
-      if (Found) {
-        Entry = Entries[OptimalFitIndex];
-        remove(OptimalFitIndex);
+      if (RetrievedIndex != CachedBlock::InvalidEntry) {
+        Entry = Entries[RetrievedIndex];
+        remove(RetrievedIndex);
         SuccessfulRetrieves++;
       }
     }
 
+    //  The difference between the retrieved memory chunk and the request
+    //  size is at most MaxAllowedFragmentedPages
+    //
+    //  / MaxAllowedFragmentedPages * PageSize \
+    // +--------------------------+-------------+
+    // |                          |             |
+    // +--------------------------+-------------+
+    //  \ Bytes to be released   /        ^
+    //                                    |
+    //                           (may or may not be committed)
+    //
+    //   The maximum number of bytes released to the OS is capped by
+    //   MaxReleasedCachePages
+    //
+    //   TODO : Consider making MaxReleasedCachePages configurable since
+    //   the release to OS API can vary across systems.
+    if (Entry.Time != 0) {
+      const uptr FragmentedBytes =
+          roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase;
+      const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize;
+      if (FragmentedBytes > MaxUnreleasedCacheBytes) {
+        const uptr MaxReleasedCacheBytes =
+            CachedBlock::MaxReleasedCachePages * PageSize;
+        uptr BytesToRelease =
+            roundUp(Min<uptr>(MaxReleasedCacheBytes,
+                              FragmentedBytes - MaxUnreleasedCacheBytes),
+                    PageSize);
+        Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
+      }
+    }
+
     return Entry;
   }
 
@@ -659,8 +724,13 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
                                            FillContentsMode FillContents) {
   CachedBlock Entry;
   uptr EntryHeaderPos;
+  uptr MaxAllowedFragmentedPages = MaxUnreleasedCachePages;
+
+  if (UNLIKELY(useMemoryTagging<Config>(Options)))
+    MaxAllowedFragmentedPages += CachedBlock::MaxReleasedCachePages;
 
-  Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
+  Entry = Cache.retrieve(MaxAllowedFragmentedPages, Size, Alignment,
+                         getHeadersSize(), EntryHeaderPos);
   if (!Entry.isValid())
     return nullptr;
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
index e85b6abdb36d22..3638f1c36ddd9b 100644
--- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
@@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test {
   std::unique_ptr<CacheT> Cache = std::make_unique<CacheT>();
 
   const scudo::uptr PageSize = scudo::getPageSizeCached();
-  // The current test allocation size is set to the minimum size
-  // needed for the scudo allocator to fall back to the secondary allocator
+  // The current test allocation size is set to the maximum
+  // cache entry size
   static constexpr scudo::uptr TestAllocSize =
       CacheConfig::getDefaultMaxEntrySize();
 
@@ -327,7 +327,7 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
   for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
     scudo::uptr EntryHeaderPos;
     scudo::CachedBlock Entry =
-        Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
+        Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos);
     EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
   }
 
@@ -336,6 +336,30 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
     MemMap.unmap();
 }
 
+TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) {
+  const scudo::uptr FragmentedPages =
+      1 + scudo::CachedBlock::MaxReleasedCachePages;
+  scudo::uptr EntryHeaderPos;
+  scudo::CachedBlock Entry;
+  scudo::MemMapT MemMap = allocate(PageSize + FragmentedPages * PageSize);
+  Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(),
+               MemMap.getBase(), MemMap);
+
+  // FragmentedPages > MaxAllowedFragmentedPages so PageSize
+  // cannot be retrieved from the cache
+  Entry = Cache->retrieve(/*MaxAllowedFragmentedPages=*/0, PageSize, PageSize,
+                          0, EntryHeaderPos);
+  EXPECT_FALSE(Entry.isValid());
+
+  // FragmentedPages == MaxAllowedFragmentedPages so PageSize
+  // can be retrieved from the cache
+  Entry =
+      Cache->retrieve(FragmentedPages, PageSize, PageSize, 0, EntryHeaderPos);
+  EXPECT_TRUE(Entry.isValid());
+
+  MemMap.unmap();
+}
+
 TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
   std::vector<scudo::MemMapT> MemMaps;
   // Fill the cache above MaxEntriesCount to force an eviction
@@ -351,7 +375,7 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
   for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
     scudo::uptr EntryHeaderPos;
     RetrievedEntries.push_back(
-        Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
+        Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos));
     EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
   }
 

From 1387ba48a312b6e9b174d850f8c9a1322f44c623 Mon Sep 17 00:00:00 2001
From: Giuseppe Rossini <giuseppe.rossini@amd.com>
Date: Mon, 26 Aug 2024 18:48:57 +0100
Subject: [PATCH 51/65] [MLIR][AMDGPU] Introduce fp16 packed arithmetic
 (#105688)

This PR is introducing rocdl.cvt.pkrtz in the ROCDL dialect and it is
using that instruction when lowering `arith::TruncFOp`.
---
 .../Conversion/ArithToAMDGPU/ArithToAMDGPU.h  |   7 +-
 mlir/include/mlir/Conversion/Passes.td        |   6 +
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td |   1 +
 mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td  |  17 ++-
 .../ArithToAMDGPU/ArithToAMDGPU.cpp           | 119 ++++++++++++++++--
 .../Conversion/ArithToAMDGPU/CMakeLists.txt   |   1 +
 mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp  |   1 +
 mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt     |   1 +
 .../ArithToAMDGPU/16-bit-floats.mlir          |  51 ++++++++
 .../ArithToAMDGPU/8-bit-float-saturation.mlir |   2 +-
 .../ArithToAMDGPU/8-bit-floats.mlir           |   2 +-
 mlir/test/Target/LLVMIR/rocdl.mlir            |   6 +
 12 files changed, 203 insertions(+), 11 deletions(-)
 create mode 100644 mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir

diff --git a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h
index 78c79c915e0607..28fdc234e5ef07 100644
--- a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h
+++ b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h
@@ -9,7 +9,9 @@
 #ifndef MLIR_CONVERSION_ARITHTOAMDGPU_ARITHTOAMDGPU_H
 #define MLIR_CONVERSION_ARITHTOAMDGPU_ARITHTOAMDGPU_H
 
+#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
 #include <memory>
+#include <string>
 
 namespace mlir {
 
@@ -26,7 +28,10 @@ namespace arith {
 /// to the largest value of that type instead of being rewritten to Inf (aka
 /// NaN).
 void populateArithToAMDGPUConversionPatterns(RewritePatternSet &patterns,
-                                             bool saturateFP8TruncF);
+                                             bool convertFP8Arithmetic,
+                                             bool saturateFP8Truncf,
+                                             bool allowPackedF16Rtz,
+                                             amdgpu::Chipset chipset);
 } // namespace arith
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 7bde9e490e4f4e..383e7dca0429c5 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -150,9 +150,15 @@ def ArithToAMDGPUConversionPass : Pass<"convert-arith-to-amdgpu"> {
   let dependentDialects = ["amdgpu::AMDGPUDialect", "vector::VectorDialect"];
 
   let options = [
+    Option<"chipset", "chipset", "std::string",
+                        /*default=*/"\"gfx000\"",
+                        "Chipset that these operations will run on">,
     Option<"saturateFP8Truncf", "saturate-fp8-truncf", "bool",
            /*default=*/"false",
            "Use saturating truncation for 8-bit float types">,
+    Option<"allowPackedF16Rtz", "allow-packed-f16-round-to-zero", "bool",
+           /*default=*/"false",
+           "Whether we should allow f32->f16 packed round-to-zero conversion">,
   ];
 }
 
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 97e0580c898080..e5c1a53f34bf64 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -25,6 +25,7 @@ def AMDGPU_Dialect : Dialect {
 
 
   let dependentDialects = [
+    "ROCDL::ROCDLDialect",
     "arith::ArithDialect",
     "gpu::GPUDialect"
   ];
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index a1e6fc3e299009..e832dfa9d6b80e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -166,7 +166,7 @@ def ROCDL_BallotOp :
   let summary = "Vote across thread group";
 
   let description = [{
-      Ballot provides a bit mask containing the 1-bit predicate value from each lane. 
+      Ballot provides a bit mask containing the 1-bit predicate value from each lane.
       The nth bit of the result contains the 1 bit contributed by the nth warp lane.
   }];
 
@@ -579,6 +579,21 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],
   }];
 }
 
+//===---------------------------------------------------------------------===//
+// 16-bit float intrinsics
+//===---------------------------------------------------------------------===//
+def ROCDL_CvtPkRtz:
+    ROCDL_IntrOp<"cvt.pkrtz", [], [], [Pure], 1>,
+    Arguments<(ins F32:$srcA, F32:$srcB)> {
+  let summary = "Convert two f32 input into a vector<2xf16>";
+  let description = [{
+    Convert two f32 values into a packed vector<2xf16>.
+  }];
+  let assemblyFormat = [{
+    attr-dict $srcA `,` $srcB `:` type($res)
+  }];
+}
+
 //===---------------------------------------------------------------------===//
 // 8-bit float intrinsics
 //===---------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
index b3798a3f7624b0..d36583c8118ff4 100644
--- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
+++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
@@ -9,8 +9,11 @@
 #include "mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h"
 
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
+#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/PatternMatch.h"
@@ -24,6 +27,7 @@ namespace mlir {
 } // namespace mlir
 
 using namespace mlir;
+using namespace mlir::amdgpu;
 
 namespace {
 struct ArithToAMDGPUConversionPass final
@@ -43,12 +47,25 @@ struct ExtFOnFloat8RewritePattern final : OpRewritePattern<arith::ExtFOp> {
 
 struct TruncFToFloat8RewritePattern final : OpRewritePattern<arith::TruncFOp> {
   bool saturateFP8 = false;
-  TruncFToFloat8RewritePattern(MLIRContext *ctx, bool saturateFP8)
-      : OpRewritePattern::OpRewritePattern(ctx), saturateFP8(saturateFP8) {}
+  TruncFToFloat8RewritePattern(MLIRContext *ctx, bool saturateFP8,
+                               Chipset chipset)
+      : OpRewritePattern::OpRewritePattern(ctx), saturateFP8(saturateFP8),
+        chipset(chipset) {}
+  Chipset chipset;
 
   LogicalResult match(arith::TruncFOp op) const override;
   void rewrite(arith::TruncFOp op, PatternRewriter &rewriter) const override;
 };
+
+struct TruncfToFloat16RewritePattern final
+    : public OpRewritePattern<arith::TruncFOp> {
+
+  using OpRewritePattern<arith::TruncFOp>::OpRewritePattern;
+
+  LogicalResult match(arith::TruncFOp op) const override;
+  void rewrite(arith::TruncFOp op, PatternRewriter &rewriter) const override;
+};
+
 } // end namespace
 
 static Value castF32To(Type elementType, Value f32, Location loc,
@@ -272,17 +289,105 @@ void TruncFToFloat8RewritePattern::rewrite(arith::TruncFOp op,
   rewriter.replaceOp(op, result);
 }
 
+LogicalResult TruncfToFloat16RewritePattern::match(arith::TruncFOp op) const {
+  Type outType = op.getOut().getType();
+  Type inputType = getElementTypeOrSelf(op.getIn());
+  if (auto outVecType = dyn_cast<VectorType>(outType)) {
+    if (outVecType.isScalable())
+      return failure();
+    outType = outVecType.getElementType();
+  }
+  return success(outType.isF16() && inputType.isF32());
+}
+
+void TruncfToFloat16RewritePattern::rewrite(arith::TruncFOp op,
+                                            PatternRewriter &rewriter) const {
+  Location loc = op.getLoc();
+  Value in = op.getIn();
+  Type outElemType = getElementTypeOrSelf(op.getOut().getType());
+  VectorType truncResType = VectorType::get(2, outElemType);
+  auto inVectorTy = dyn_cast<VectorType>(in.getType());
+
+  // Handle the case where input type is not a vector type
+  if (!inVectorTy) {
+    auto sourceB = rewriter.create<LLVM::PoisonOp>(loc, rewriter.getF32Type());
+    Value asF16s =
+        rewriter.create<ROCDL::CvtPkRtz>(loc, truncResType, in, sourceB);
+    Value result = rewriter.create<vector::ExtractElementOp>(
+        loc, asF16s, rewriter.createOrFold<arith::ConstantIndexOp>(loc, 0));
+    return rewriter.replaceOp(op, result);
+  }
+  VectorType outType = cast<VectorType>(op.getOut().getType());
+  int64_t numElements = outType.getNumElements();
+  Value zero = rewriter.createOrFold<arith::ConstantOp>(
+      loc, outElemType, rewriter.getFloatAttr(outElemType, 0.0));
+  Value result = rewriter.createOrFold<vector::SplatOp>(loc, outType, zero);
+
+  if (inVectorTy.getRank() > 1) {
+    inVectorTy = VectorType::get(SmallVector<int64_t>{numElements},
+                                 inVectorTy.getElementType());
+    in = rewriter.create<vector::ShapeCastOp>(loc, inVectorTy, in);
+  }
+
+  // Handle the vector case. We also handle the (uncommon) case where the vector
+  // length is odd
+  for (int64_t i = 0; i < numElements; i += 2) {
+    int64_t elemsThisOp = std::min(numElements, i + 2) - i;
+    Value thisResult = nullptr;
+    Value elemA = rewriter.create<vector::ExtractElementOp>(
+        loc, in, rewriter.create<arith::ConstantIndexOp>(loc, i));
+    Value elemB = rewriter.create<LLVM::PoisonOp>(loc, rewriter.getF32Type());
+
+    if (elemsThisOp == 2) {
+      elemB = rewriter.create<vector::ExtractElementOp>(
+          loc, in, rewriter.createOrFold<arith::ConstantIndexOp>(loc, i + 1));
+    }
+
+    thisResult =
+        rewriter.create<ROCDL::CvtPkRtz>(loc, truncResType, elemA, elemB);
+    // Place back the truncated result into the possibly larger vector. If we
+    // are operating on a size 2 vector, these operations should be folded away
+    thisResult = rewriter.create<vector::ExtractStridedSliceOp>(
+        loc, thisResult, 0, elemsThisOp, 1);
+    result = rewriter.create<vector::InsertStridedSliceOp>(loc, thisResult,
+                                                           result, i, 1);
+  }
+
+  if (inVectorTy.getRank() != outType.getRank()) {
+    result = rewriter.create<vector::ShapeCastOp>(loc, outType, result);
+  }
+
+  rewriter.replaceOp(op, result);
+}
+
 void mlir::arith::populateArithToAMDGPUConversionPatterns(
-    RewritePatternSet &patterns, bool saturateFP8TruncF) {
-  patterns.add<ExtFOnFloat8RewritePattern>(patterns.getContext());
-  patterns.add<TruncFToFloat8RewritePattern>(patterns.getContext(),
-                                             saturateFP8TruncF);
+    RewritePatternSet &patterns, bool convertFP8Arithmetic,
+    bool saturateFP8Truncf, bool allowPackedF16Rtz, Chipset chipset) {
+
+  if (convertFP8Arithmetic) {
+    patterns.add<ExtFOnFloat8RewritePattern>(patterns.getContext());
+    patterns.add<TruncFToFloat8RewritePattern>(patterns.getContext(),
+                                               saturateFP8Truncf, chipset);
+  }
+  if (allowPackedF16Rtz)
+    patterns.add<TruncfToFloat16RewritePattern>(patterns.getContext());
 }
 
 void ArithToAMDGPUConversionPass::runOnOperation() {
   Operation *op = getOperation();
+  MLIRContext *ctx = &getContext();
   RewritePatternSet patterns(op->getContext());
-  arith::populateArithToAMDGPUConversionPatterns(patterns, saturateFP8Truncf);
+  FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
+  if (failed(maybeChipset)) {
+    emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
+    return signalPassFailure();
+  }
+
+  bool convertFP8Arithmetic =
+      (*maybeChipset).majorVersion == 9 && (*maybeChipset).minorVersion >= 0x40;
+  arith::populateArithToAMDGPUConversionPatterns(
+      patterns, convertFP8Arithmetic, saturateFP8Truncf, allowPackedF16Rtz,
+      *maybeChipset);
   if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
     return signalPassFailure();
 }
diff --git a/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt b/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt
index e2c951b0b34d8b..50be09ab5a7c5b 100644
--- a/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt
+++ b/mlir/lib/Conversion/ArithToAMDGPU/CMakeLists.txt
@@ -12,6 +12,7 @@ add_mlir_conversion_library(MLIRArithToAMDGPU
 
   LINK_LIBS PUBLIC
   MLIRAMDGPUDialect
+  MLIRAMDGPUUtils
   MLIRArithDialect
   MLIRArithUtils
   MLIRVectorDialect
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index c1a785fb25478d..3943696364950f 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -14,6 +14,7 @@
 
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Diagnostics.h"
diff --git a/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt
index 0551d13b5a0cf0..78d78cf48a747c 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/AMDGPU/IR/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRAMDGPUDialect
 
   LINK_LIBS PUBLIC
   MLIRArithDialect
+  MLIRROCDLDialect
   # Needed for GPU address space enum definition
   MLIRGPUDialect
   MLIRIR
diff --git a/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir
new file mode 100644
index 00000000000000..121cae26748a82
--- /dev/null
+++ b/mlir/test/Conversion/ArithToAMDGPU/16-bit-floats.mlir
@@ -0,0 +1,51 @@
+// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="allow-packed-f16-round-to-zero=true" | FileCheck %s
+
+// CHECK-LABEL: @scalar_trunc
+// CHECK-SAME: (%[[value:.*]]: f32)
+func.func @scalar_trunc(%v: f32) -> f16{
+  // CHECK: %[[poison:.*]] = llvm.mlir.poison : f32
+  // CHECK: %[[trunc:.*]] = rocdl.cvt.pkrtz %[[value]], %[[poison]] : vector<2xf16>
+  // CHECK: %[[extract:.*]] = vector.extractelement %[[trunc]][%c0 : index] : vector<2xf16>
+  // CHECK: return %[[extract]] : f16
+  %w = arith.truncf %v : f32 to f16
+  return %w : f16
+}
+
+// CHECK-LABEL: @vector_trunc
+// CHECK-SAME: (%[[value:.*]]: vector<2xf32>)
+func.func @vector_trunc_short(%v: vector<2xf32>) -> vector<2xf16> {
+  // CHECK: %[[elem0:.*]] = vector.extractelement %[[value]]
+  // CHECK: %[[elem1:.*]] = vector.extractelement %[[value]]
+  // CHECK: %[[ret:.*]] = rocdl.cvt.pkrtz %[[elem0]], %[[elem1]] : vector<2xf16>
+  // CHECK: return %[[ret]]
+  %w = arith.truncf %v : vector<2xf32> to vector<2xf16>
+  return %w : vector<2xf16>
+}
+
+// CHECK-LABEL:  @vector_trunc_long
+// CHECK-SAME: (%[[value:.*]]: vector<9xf32>)
+func.func @vector_trunc_long(%v: vector<9xf32>) -> vector<9xf16> {
+  // CHECK: %[[elem0:.*]] = vector.extractelement %[[value]][%c0 : index]
+  // CHECK: %[[elem1:.*]] = vector.extractelement %[[value]][%c1 : index]
+  // CHECK: %[[packed0:.*]] = rocdl.cvt.pkrtz %[[elem0]], %[[elem1]] : vector<2xf16>
+  // CHECK: %[[out0:.*]] = vector.insert_strided_slice %[[packed0]], {{.*}} {offsets = [0], strides = [1]} : vector<2xf16> into vector<9xf16>
+  // CHECK: %[[elem2:.*]] = vector.extractelement %[[value]][%c2 : index]
+  // CHECK: %[[elem3:.*]] = vector.extractelement %[[value]][%c3 : index]
+  // CHECK: %[[packed1:.*]] = rocdl.cvt.pkrtz %[[elem2]], %[[elem3]] : vector<2xf16>
+  // CHECK: %[[out1:.*]] = vector.insert_strided_slice %[[packed1]], %[[out0]] {offsets = [2], strides = [1]} : vector<2xf16> into vector<9xf16>
+  // CHECK: %[[elem4:.*]] = vector.extractelement %[[value]][%c4 : index]
+  // CHECK: %[[elem5:.*]] = vector.extractelement %[[value]][%c5 : index]
+  // CHECK: %[[packed2:.*]] = rocdl.cvt.pkrtz %[[elem4]], %[[elem5]] : vector<2xf16>
+  // CHECK: %[[out2:.*]] = vector.insert_strided_slice %[[packed2]], %[[out1]] {offsets = [4], strides = [1]} : vector<2xf16> into vector<9xf16>
+  // CHECK: %[[elem6:.*]] = vector.extractelement %[[value]]
+  // CHECK: %[[elem7:.*]] = vector.extractelement %[[value]]
+  // CHECK: %[[packed3:.*]] = rocdl.cvt.pkrtz %[[elem6]], %[[elem7]] : vector<2xf16>
+  // CHECK: %[[out3:.*]] = vector.insert_strided_slice %[[packed3]], %[[out2]] {offsets = [6], strides = [1]} : vector<2xf16> into vector<9xf16>
+  // CHECK: %[[elem8:.*]] = vector.extractelement %[[value]]
+  // CHECK: %[[packed4:.*]] = rocdl.cvt.pkrtz %[[elem8:.*]] : vector<2xf16>
+  // CHECK: %[[slice:.*]] = vector.extract_strided_slice %[[packed4]] {offsets = [0], sizes = [1], strides = [1]} : vector<2xf16> to vector<1xf16>
+  // CHECK: %[[out4:.*]] = vector.insert_strided_slice %[[slice]], %[[out3]] {offsets = [8], strides = [1]} : vector<1xf16> into vector<9xf16>
+  // CHECK: return %[[out4]]
+  %w = arith.truncf %v : vector<9xf32> to vector<9xf16>
+  return %w : vector<9xf16>
+}
diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir
index c7f39440a349b0..cd921da2294e13 100644
--- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir
+++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-float-saturation.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt --split-input-file %s \
-// RUN:   --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{saturate-fp8-truncf=true}))' \
+// RUN:   --pass-pipeline='builtin.module(func.func(convert-arith-to-amdgpu{chipset=gfx940 saturate-fp8-truncf=true}))' \
 // RUN:   | FileCheck %s
 
 // CHECK-LABEL: func.func @scalar_trunc
diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
index 26a222a4a788e5..bd90facb615440 100644
--- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
+++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu | FileCheck %s
+// RUN: mlir-opt --split-input-file %s -convert-arith-to-amdgpu="chipset=gfx940" | FileCheck %s
 
 // CHECK-LABEL: func.func @scalar_ext
 // CHECK-SAME: ([[V:%.+]]: f8E5M2FNUZ)
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 64bcb5bdb255db..d902a82eeb9ea2 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -530,6 +530,12 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
   llvm.return %source5 : i32
 }
 
+llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> {
+  // CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}})
+  %source = rocdl.cvt.pkrtz %sourceA, %sourceB  : vector<2xf16>
+  llvm.return %source : vector<2xf16>
+}
+
 // CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
 // CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
 // CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"

From 924a7d83b4287b3b85dd1ca29d2d3e1f0a10ea68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kim=20Gr=C3=A4sman?= <kim.grasman@gmail.com>
Date: Mon, 26 Aug 2024 19:49:56 +0200
Subject: [PATCH 52/65] Use CLANG_RESOURCE_DIR more consistently (#103388)

When Clang is consumed as a library, the CLANG_RESOURCE_DIR definition
is not exported from the CMake system, so external clients will be
unable to compute the same resource dir as Clang itself would, because
they don't know what to pass for the optional CustomResourceDir
argument.

All call sites except one would pass CLANG_RESOURCE_DIR to
Driver::GetResourcesPath. It seems the one exception in libclang
CIndexer was an oversight.

Move the use of CLANG_RESOURCE_DIR into GetResourcesPath and remove the
optional argument to avoid this inconsistency between internal and
external clients.
---
 clang/include/clang/Driver/Driver.h                |  3 +--
 clang/lib/Driver/Driver.cpp                        | 14 +++++++-------
 clang/lib/Frontend/CompilerInvocation.cpp          |  2 +-
 .../Plugins/ExpressionParser/Clang/ClangHost.cpp   |  2 +-
 lldb/unittests/Expression/ClangParserTest.cpp      |  4 ++--
 5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index 84eadd42880a50..9177d56718ee77 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -379,8 +379,7 @@ class Driver {
 
   /// Takes the path to a binary that's either in bin/ or lib/ and returns
   /// the path to clang's resource directory.
-  static std::string GetResourcesPath(StringRef BinaryPath,
-                                      StringRef CustomResourceDir = "");
+  static std::string GetResourcesPath(StringRef BinaryPath);
 
   Driver(StringRef ClangExecutable, StringRef TargetTriple,
          DiagnosticsEngine &Diags, std::string Title = "clang LLVM compiler",
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index e12416e51f8d24..43002add33774b 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -171,18 +171,18 @@ getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
 }
 
 // static
-std::string Driver::GetResourcesPath(StringRef BinaryPath,
-                                     StringRef CustomResourceDir) {
+std::string Driver::GetResourcesPath(StringRef BinaryPath) {
   // Since the resource directory is embedded in the module hash, it's important
   // that all places that need it call this function, so that they get the
   // exact same string ("a/../b/" and "b/" get different hashes, for example).
 
   // Dir is bin/ or lib/, depending on where BinaryPath is.
-  std::string Dir = std::string(llvm::sys::path::parent_path(BinaryPath));
-
+  StringRef Dir = llvm::sys::path::parent_path(BinaryPath);
   SmallString<128> P(Dir);
-  if (CustomResourceDir != "") {
-    llvm::sys::path::append(P, CustomResourceDir);
+
+  StringRef ConfiguredResourceDir(CLANG_RESOURCE_DIR);
+  if (!ConfiguredResourceDir.empty()) {
+    llvm::sys::path::append(P, ConfiguredResourceDir);
   } else {
     // On Windows, libclang.dll is in bin/.
     // On non-Windows, libclang.so/.dylib is in lib/.
@@ -239,7 +239,7 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple,
 #endif
 
   // Compute the path to the resource directory.
-  ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR);
+  ResourceDir = GetResourcesPath(ClangExecutable);
 }
 
 void Driver::setDriverMode(StringRef Value) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 0bb4175dd021ee..32628c5e84332d 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3130,7 +3130,7 @@ std::string CompilerInvocation::GetResourcesPath(const char *Argv0,
                                                  void *MainAddr) {
   std::string ClangExecutable =
       llvm::sys::fs::getMainExecutable(Argv0, MainAddr);
-  return Driver::GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR);
+  return Driver::GetResourcesPath(ClangExecutable);
 }
 
 static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts,
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp
index 6064c02c7fd67d..6de851081598fd 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangHost.cpp
@@ -53,7 +53,7 @@ static bool DefaultComputeClangResourceDirectory(FileSpec &lldb_shlib_spec,
   std::string raw_path = lldb_shlib_spec.GetPath();
   llvm::StringRef parent_dir = llvm::sys::path::parent_path(raw_path);
   static const std::string clang_resource_path =
-      clang::driver::Driver::GetResourcesPath("bin/lldb", CLANG_RESOURCE_DIR);
+      clang::driver::Driver::GetResourcesPath("bin/lldb");
 
   static const llvm::StringRef kResourceDirSuffixes[] = {
       // LLVM.org's build of LLDB uses the clang resource directory placed
diff --git a/lldb/unittests/Expression/ClangParserTest.cpp b/lldb/unittests/Expression/ClangParserTest.cpp
index 6f682f6c97fdb5..fab4487c737195 100644
--- a/lldb/unittests/Expression/ClangParserTest.cpp
+++ b/lldb/unittests/Expression/ClangParserTest.cpp
@@ -42,8 +42,8 @@ TEST_F(ClangHostTest, ComputeClangResourceDirectory) {
 #else
   std::string path_to_liblldb = "C:\\foo\\bar\\lib\\";
 #endif
-  std::string path_to_clang_dir = clang::driver::Driver::GetResourcesPath(
-      path_to_liblldb + "liblldb", CLANG_RESOURCE_DIR);
+  std::string path_to_clang_dir =
+      clang::driver::Driver::GetResourcesPath(path_to_liblldb + "liblldb");
   llvm::SmallString<256> path_to_clang_lib_dir_real;
   llvm::sys::fs::real_path(path_to_clang_dir, path_to_clang_lib_dir_real);
 

From f099f76bb2a55bb6a90b30b81bae9f55ea37fcb5 Mon Sep 17 00:00:00 2001
From: Peter Klausler <35819229+klausler@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:51:24 -0700
Subject: [PATCH 53/65] [flang] Handle pp-directives better in line
 continuation (#105572)

The code for detecting and processing some preprocessing directives
(conditional compilation and #line) while skipping comments between one
source or compiler directive line and its continuations wasn't correctly
handling the case of such a directive following an explicit ampersand.

Fixes https://github.com/llvm/llvm-project/issues/100730 and
https://github.com/llvm/llvm-project/issues/100345.
---
 flang/lib/Parser/prescan.cpp                | 60 ++++++++++-----------
 flang/test/Preprocessing/line-in-contin.F90 | 24 +++++++--
 2 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index c01d512b4653de..804ada7d11e020 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -207,11 +207,13 @@ void Prescanner::Statement() {
           toks.Put(id, GetProvenance(at_));
           if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) {
             auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())};
-            disableSourceContinuation_ =
-                newLineClass.kind != LineClassification::Kind::Source;
             if (newLineClass.kind ==
                 LineClassification::Kind::CompilerDirective) {
               directiveSentinel_ = newLineClass.sentinel;
+              disableSourceContinuation_ = false;
+            } else {
+              disableSourceContinuation_ =
+                  newLineClass.kind != LineClassification::Kind::Source;
             }
           }
         }
@@ -1114,39 +1116,33 @@ bool Prescanner::SkipCommentLine(bool afterAmpersand) {
       SkipToEndOfLine();
       omitNewline_ = true;
     }
-    return false;
-  }
-  auto lineClass{ClassifyLine(nextLine_)};
-  if (lineClass.kind == LineClassification::Kind::Comment) {
-    NextLine();
-    return true;
   } else if (inPreprocessorDirective_) {
-    return false;
-  } else if (afterAmpersand &&
-      (lineClass.kind ==
-              LineClassification::Kind::ConditionalCompilationDirective ||
-          lineClass.kind == LineClassification::Kind::DefinitionDirective ||
-          lineClass.kind == LineClassification::Kind::PreprocessorDirective ||
-          lineClass.kind == LineClassification::Kind::IncludeDirective ||
-          lineClass.kind == LineClassification::Kind::IncludeLine)) {
-    SkipToEndOfLine();
-    omitNewline_ = true;
-    skipLeadingAmpersand_ = true;
-    return false;
-  } else if (lineClass.kind ==
-          LineClassification::Kind::ConditionalCompilationDirective ||
-      lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
-    // Allow conditional compilation directives (e.g., #ifdef) to affect
-    // continuation lines.
-    // Allow other preprocessor directives, too, except #include
-    // (when it does not follow '&'), #define, and #undef (because
-    // they cannot be allowed to affect preceding text on a
-    // continued line).
-    preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
-    return true;
   } else {
-    return false;
+    auto lineClass{ClassifyLine(nextLine_)};
+    if (lineClass.kind == LineClassification::Kind::Comment) {
+      NextLine();
+      return true;
+    } else if (lineClass.kind ==
+            LineClassification::Kind::ConditionalCompilationDirective ||
+        lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
+      // Allow conditional compilation directives (e.g., #ifdef) to affect
+      // continuation lines.
+      // Allow other preprocessor directives, too, except #include
+      // (when it does not follow '&'), #define, and #undef (because
+      // they cannot be allowed to affect preceding text on a
+      // continued line).
+      preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
+      return true;
+    } else if (afterAmpersand &&
+        (lineClass.kind == LineClassification::Kind::DefinitionDirective ||
+            lineClass.kind == LineClassification::Kind::IncludeDirective ||
+            lineClass.kind == LineClassification::Kind::IncludeLine)) {
+      SkipToEndOfLine();
+      omitNewline_ = true;
+      skipLeadingAmpersand_ = true;
+    }
   }
+  return false;
 }
 
 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
diff --git a/flang/test/Preprocessing/line-in-contin.F90 b/flang/test/Preprocessing/line-in-contin.F90
index 138e579bffaa28..28efbd02d3ae89 100644
--- a/flang/test/Preprocessing/line-in-contin.F90
+++ b/flang/test/Preprocessing/line-in-contin.F90
@@ -1,8 +1,10 @@
-! RUN: %flang_fc1 -E %s 2>&1 | FileCheck %s
-! CHECK: call foo( 0.)
-! CHECK: call foo( 1.)
-! CHECK: call foo( 2.)
-! CHECK: call foo( 3.)
+! RUN: %flang_fc1 -fopenmp -E %s 2>&1 | FileCheck %s
+! CHECK: call foo(0.)
+! CHECK: call foo(1.)
+! CHECK: call foo(2.)
+! CHECK: call foo(3.)
+! CHECK: !$omp parallel do default(none) private(j)
+! CHECK: !$omp end parallel do
 call foo( &
 # 100 "bar.h"
          & 0.)
@@ -17,4 +19,16 @@
 # 103 "bar.h"
          & 3. &
     )
+!$omp parallel do &
+#ifdef undef
+!$omp garbage &
+#else
+!$omp default(none) &
+#endif
+!$omp private(j)
+  do j=1,100
+  end do
+!$omp end &
+# 104 "bar.h"
+!$omp parallel do
 end

From 2326a02357c74a1a913a3d572bf789d4d48af7f0 Mon Sep 17 00:00:00 2001
From: Peter Klausler <35819229+klausler@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:52:19 -0700
Subject: [PATCH 54/65] [flang] Support read-only access to an anonymous unit
 (#105859)

Don't require the "fort.123" file implicitly opened by READ(123, ... to
be writable.
---
 flang/runtime/external-unit.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/flang/runtime/external-unit.cpp b/flang/runtime/external-unit.cpp
index 8009151a8a370c..d17a92622f8448 100644
--- a/flang/runtime/external-unit.cpp
+++ b/flang/runtime/external-unit.cpp
@@ -65,9 +65,13 @@ ExternalFileUnit *ExternalFileUnit::LookUpOrCreateAnonymous(int unit,
   bool exists{false};
   ExternalFileUnit *result{GetUnitMap().LookUpOrCreate(unit, handler, exists)};
   if (result && !exists) {
+    common::optional<Action> action;
+    if (dir == Direction::Output) {
+      action = Action::ReadWrite;
+    }
     if (!result->OpenAnonymousUnit(
             dir == Direction::Input ? OpenStatus::Unknown : OpenStatus::Replace,
-            Action::ReadWrite, Position::Rewind, Convert::Unknown, handler)) {
+            action, Position::Rewind, Convert::Unknown, handler)) {
       // fort.N isn't a writable file
       if (ExternalFileUnit * closed{LookUpForClose(result->unitNumber())}) {
         closed->DestroyClosed();

From b52728d89bb44ec59fa60ec02e1a9cbdb86037e1 Mon Sep 17 00:00:00 2001
From: Peter Klausler <35819229+klausler@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:53:17 -0700
Subject: [PATCH 55/65] [flang] Silence warning when inappropriate (#105867)

When a function returns an array, using an element of that array is an
actual argument in a procedure reference with an implicit interface
should suffice to avoid a warning about an undefined function result.
---
 flang/lib/Semantics/check-call.cpp      | 8 ++++----
 flang/test/Semantics/undef-result01.f90 | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index 4708d51d3af4dd..c7ec8733655648 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -56,6 +56,10 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg,
         "%VAL argument must be a scalar numeric or logical expression"_err_en_US);
   }
   if (const auto *expr{arg.UnwrapExpr()}) {
+    if (const Symbol * base{GetFirstSymbol(*expr)};
+        base && IsFunctionResult(*base)) {
+      context.NoteDefinedSymbol(*base);
+    }
     if (IsBOZLiteral(*expr)) {
       messages.Say("BOZ argument requires an explicit interface"_err_en_US);
     } else if (evaluate::IsNullPointer(*expr)) {
@@ -79,10 +83,6 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg,
         messages.Say(
             "VOLATILE argument requires an explicit interface"_err_en_US);
       }
-      if (const Symbol & base{named->GetFirstSymbol()};
-          IsFunctionResult(base)) {
-        context.NoteDefinedSymbol(base);
-      }
     } else if (auto argChars{characteristics::DummyArgument::FromActual(
                    "actual argument", *expr, context.foldingContext(),
                    /*forImplicitInterface=*/true)}) {
diff --git a/flang/test/Semantics/undef-result01.f90 b/flang/test/Semantics/undef-result01.f90
index bf6af11a8d7b92..08e7fe1e448998 100644
--- a/flang/test/Semantics/undef-result01.f90
+++ b/flang/test/Semantics/undef-result01.f90
@@ -148,3 +148,8 @@ function defdByAssociate()
     s = 1.
   end associate
 end
+
+function defdByElementArgToImplicit() result(r)
+  real r(1)
+  call define(r(1))
+end

From 047168dae79cd6e0087eb86810006c635f017df6 Mon Sep 17 00:00:00 2001
From: Peter Klausler <35819229+klausler@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:54:03 -0700
Subject: [PATCH 56/65] [flang] Fix parser crash (#105875)

The production for a bare file unit number in an I/O statement checks
that the scalar integer expression isn't followed by "=", in order to
disambiguate FLUSHN from FLUSHN=1, and to not treat a control specifier
keyword as an integer expression. The implementation of this check used
!"="_tok, which has the side effect of producing no error message; this
can lead to a parsing crash later when a failed parse of an erroneous
program is found to have produced no errors. Rewrite as a lookAhead call
for those characters that acually can follow a bare unit number.

Fixes https://github.com/llvm/llvm-project/issues/105779.
---
 flang/lib/Parser/io-parsers.cpp  | 3 ++-
 flang/test/Parser/recovery05.f90 | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/Parser/recovery05.f90

diff --git a/flang/lib/Parser/io-parsers.cpp b/flang/lib/Parser/io-parsers.cpp
index ca0dbedc8da427..25b09efd40c529 100644
--- a/flang/lib/Parser/io-parsers.cpp
+++ b/flang/lib/Parser/io-parsers.cpp
@@ -27,7 +27,8 @@ TYPE_PARSER(construct<IoUnit>(variable / lookAhead(space / ",);\n"_ch)) ||
     construct<IoUnit>(fileUnitNumber) || construct<IoUnit>(star))
 
 // R1202 file-unit-number -> scalar-int-expr
-TYPE_PARSER(construct<FileUnitNumber>(scalarIntExpr / !"="_tok))
+TYPE_PARSER(construct<FileUnitNumber>(
+    scalarIntExpr / (lookAhead(space >> ",)"_ch) || atEndOfStmt)))
 
 // R1204 open-stmt -> OPEN ( connect-spec-list )
 TYPE_CONTEXT_PARSER("OPEN statement"_en_US,
diff --git a/flang/test/Parser/recovery05.f90 b/flang/test/Parser/recovery05.f90
new file mode 100644
index 00000000000000..9c8c3689b27bd5
--- /dev/null
+++ b/flang/test/Parser/recovery05.f90
@@ -0,0 +1,5 @@
+! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s
+continue
+! CHECK: error: expected end of statement
+flush iostat=1
+end

From f428f5fc680cfd4d72234ad078c85e868e1ac7a0 Mon Sep 17 00:00:00 2001
From: Peter Klausler <35819229+klausler@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:54:29 -0700
Subject: [PATCH 57/65] [flang][runtime] Add alternate SELECTED_(INT|REAL)_KIND
 APIs (#105887)

Add extended versions of SELECTED_INT_KIND and SELECTED_REAL_KIND
runtime APIs that permit lowering to pass along a bit mask of acceptable
kinds. The existing APIs call the new ones with a full bit mask. If
lowering transitions to always use the new APIs the old ones can then be
deleted.
---
 flang/include/flang/Runtime/numeric.h |  4 ++
 flang/runtime/numeric.cpp             | 94 +++++++++++++++------------
 2 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h
index e051e864316630..6e1979790e3c61 100644
--- a/flang/include/flang/Runtime/numeric.h
+++ b/flang/include/flang/Runtime/numeric.h
@@ -377,6 +377,8 @@ CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedCharKind)(
 // SELECTED_INT_KIND
 CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedIntKind)(
     const char *, int, void *, int);
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedIntKindMasked)(
+    const char *, int, void *, int, int);
 
 // SELECTED_LOGICAL_KIND
 CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedLogicalKind)(
@@ -385,6 +387,8 @@ CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedLogicalKind)(
 // SELECTED_REAL_KIND
 CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedRealKind)(
     const char *, int, void *, int, void *, int, void *, int);
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedRealKindMasked)(
+    const char *, int, void *, int, void *, int, void *, int, int);
 
 // SPACING
 CppTypeFor<TypeCategory::Real, 4> RTDECL(Spacing4)(
diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp
index 40bacf07157a27..b5e0851a16cd1e 100644
--- a/flang/runtime/numeric.cpp
+++ b/flang/runtime/numeric.cpp
@@ -95,20 +95,22 @@ template <typename T> inline RT_API_ATTRS T Scale(T x, std::int64_t p) {
 }
 
 // SELECTED_INT_KIND (16.9.169)
-template <typename T>
-inline RT_API_ATTRS CppTypeFor<TypeCategory::Integer, 4> SelectedIntKind(T x) {
-  if (x <= 2) {
+template <typename X, typename M>
+inline RT_API_ATTRS CppTypeFor<TypeCategory::Integer, 4> SelectedIntKind(
+    X x, M mask) {
+#if !defined __SIZEOF_INT128__ || defined FLANG_RUNTIME_NO_INTEGER_16
+  mask &= ~(1 << 16);
+#endif
+  if (x <= 2 && (mask & (1 << 1))) {
     return 1;
-  } else if (x <= 4) {
+  } else if (x <= 4 && (mask & (1 << 2))) {
     return 2;
-  } else if (x <= 9) {
+  } else if (x <= 9 && (mask & (1 << 4))) {
     return 4;
-  } else if (x <= 18) {
+  } else if (x <= 18 && (mask & (1 << 8))) {
     return 8;
-#if defined __SIZEOF_INT128__ && !defined FLANG_RUNTIME_NO_INTEGER_16
-  } else if (x <= 38) {
+  } else if (x <= 38 && (mask & (1 << 16))) {
     return 16;
-#endif
   }
   return -1;
 }
@@ -130,60 +132,52 @@ inline RT_API_ATTRS CppTypeFor<TypeCategory::Integer, 4> SelectedLogicalKind(
 }
 
 // SELECTED_REAL_KIND (16.9.170)
-template <typename P, typename R, typename D>
+template <typename P, typename R, typename D, typename M>
 inline RT_API_ATTRS CppTypeFor<TypeCategory::Integer, 4> SelectedRealKind(
-    P p, R r, D d) {
+    P p, R r, D d, M mask) {
   if (d != 2) {
     return -5;
   }
-
-#ifndef FLANG_RUNTIME_NO_REAL_2
-  constexpr bool hasReal2{true};
-#else
-  constexpr bool hasReal2{false};
+#ifdef FLANG_RUNTIME_NO_REAL_2
+  mask &= ~(1 << 2);
 #endif
-#ifndef FLANG_RUNTIME_NO_REAL_3
-  constexpr bool hasReal3{true};
-#else
-  constexpr bool hasReal3{false};
+#ifdef FLANG_RUNTIME_NO_REAL_3
+  mask &= ~(1 << 3);
 #endif
-#if defined LDBL_MANT_DIG == 64 && !defined FLANG_RUNTIME_NO_REAL_10
-  constexpr bool hasReal10{true};
-#else
-  constexpr bool hasReal10{false};
+#if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_10
+  mask &= ~(1 << 10);
 #endif
-#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && \
-    !defined FLANG_RUNTIME_NO_REAL_16
-  constexpr bool hasReal16{true};
-#else
-  constexpr bool hasReal16{false};
+#if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_16
+  mask &= ~(1 << 16);
 #endif
 
   int error{0};
   int kind{0};
-  if (hasReal2 && p <= 3) {
+  if (p <= 3 && (mask & (1 << 2))) {
     kind = 2;
-  } else if (p <= 6) {
+  } else if (p <= 6 && (mask & (1 << 4))) {
     kind = 4;
-  } else if (p <= 15) {
+  } else if (p <= 15 && (mask & (1 << 8))) {
     kind = 8;
-  } else if (hasReal10 && p <= 18) {
+  } else if (p <= 18 && (mask & (1 << 10))) {
     kind = 10;
-  } else if (hasReal16 && p <= 33) {
+  } else if (p <= 33 && (mask & (1 << 16))) {
     kind = 16;
   } else {
     error -= 1;
   }
 
-  if (r <= 4) {
-    kind = kind < 2 ? (hasReal2 ? 2 : 4) : kind;
-  } else if (r <= 37) {
-    kind = kind < 3 ? (hasReal3 && p != 3 ? 3 : 4) : kind;
-  } else if (r <= 307) {
+  if (r <= 4 && (mask & (1 << 2))) {
+    kind = kind < 2 ? 2 : kind;
+  } else if (r <= 37 && p != 3 && (mask & (1 << 3))) {
+    kind = kind < 3 ? 3 : kind;
+  } else if (r <= 37 && (mask & (1 << 4))) {
+    kind = kind < 4 ? 4 : kind;
+  } else if (r <= 307 && (mask & (1 << 8))) {
     kind = kind < 8 ? 8 : kind;
-  } else if (hasReal10 && r <= 4931) {
+  } else if (r <= 4931 && (mask & (1 << 10))) {
     kind = kind < 10 ? 10 : kind;
-  } else if (hasReal16 && r <= 4931) {
+  } else if (r <= 4931 && (mask & (1 << 16))) {
     kind = kind < 16 ? 16 : kind;
   } else {
     error -= 2;
@@ -790,6 +784,12 @@ CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedCharKind)(
 // SELECTED_INT_KIND
 CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedIntKind)(
     const char *source, int line, void *x, int xKind) {
+  return RTNAME(SelectedIntKindMasked)(source, line, x, xKind,
+      (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8) | (1 << 16));
+}
+
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedIntKindMasked)(
+    const char *source, int line, void *x, int xKind, int mask) {
 #ifdef __SIZEOF_INT128__
   CppTypeFor<TypeCategory::Integer, 16> r =
       GetIntArgValue<CppTypeFor<TypeCategory::Integer, 16>>(
@@ -798,7 +798,7 @@ CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedIntKind)(
   std::int64_t r = GetIntArgValue<std::int64_t>(
       source, line, x, xKind, /*defaultValue*/ 0, /*resKind*/ 8);
 #endif
-  return SelectedIntKind(r);
+  return SelectedIntKind(r, mask);
 }
 
 // SELECTED_LOGICAL_KIND
@@ -819,6 +819,14 @@ CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedLogicalKind)(
 CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedRealKind)(const char *source,
     int line, void *precision, int pKind, void *range, int rKind, void *radix,
     int dKind) {
+  return RTNAME(SelectedRealKindMasked)(source, line, precision, pKind, range,
+      rKind, radix, dKind,
+      (1 << 2) | (1 << 3) | (1 << 4) | (1 << 8) | (1 << 10) | (1 << 16));
+}
+
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedRealKindMasked)(
+    const char *source, int line, void *precision, int pKind, void *range,
+    int rKind, void *radix, int dKind, int mask) {
 #ifdef __SIZEOF_INT128__
   CppTypeFor<TypeCategory::Integer, 16> p =
       GetIntArgValue<CppTypeFor<TypeCategory::Integer, 16>>(
@@ -837,7 +845,7 @@ CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedRealKind)(const char *source,
   std::int64_t d = GetIntArgValue<std::int64_t>(
       source, line, radix, dKind, /*defaultValue*/ 2, /*resKind*/ 8);
 #endif
-  return SelectedRealKind(p, r, d);
+  return SelectedRealKind(p, r, d, mask);
 }
 
 CppTypeFor<TypeCategory::Real, 4> RTDEF(Spacing4)(

From c4b7c47fa53ee6d75b64737d475e5dbd1ed4a409 Mon Sep 17 00:00:00 2001
From: Fabio D'Urso <fdurso@google.com>
Date: Mon, 26 Aug 2024 19:55:02 +0200
Subject: [PATCH 58/65] [scudo] Fix expectation in ScudoTimingTest.VerifyMax
 (#106062)

---
 compiler-rt/lib/scudo/standalone/tests/timing_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp
index 23f0a02ea4277b..a762aee48f7c63 100644
--- a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp
@@ -154,7 +154,7 @@ TEST_F(ScudoTimingTest, VerifyMax) {
   unsigned long long MaxNs = std::strtoull(&end[6], &end, 10);
   ASSERT_TRUE(end != nullptr);
 
-  EXPECT_GT(MaxNs, AvgNs);
+  EXPECT_GE(MaxNs, AvgNs);
 }
 
 TEST_F(ScudoTimingTest, VerifyMultipleTimerCalls) {

From 961a138237947daa5ccabfc87dbfbad8b47146e4 Mon Sep 17 00:00:00 2001
From: Peter Klausler <35819229+klausler@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:56:37 -0700
Subject: [PATCH 59/65] [flang] Silence spurious error (#106086)

Don't attempt to give an object a default binding label when it shows up
in a declaration after it has already been given an explicit binding
label in an earlier declaration.

Fixes https://github.com/llvm/llvm-project/issues/106019.
---
 flang/lib/Semantics/resolve-names.cpp   | 3 +++
 flang/test/Semantics/declarations03.f90 | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index c0478fd4390076..ec8f854f64d103 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1797,6 +1797,9 @@ void AttrsVisitor::SetBindNameOn(Symbol &symbol) {
     }
     auto last{label->find_last_not_of(" ")};
     label = label->substr(first, last - first + 1);
+  } else if (symbol.GetIsExplicitBindName()) {
+    // don't try to override explicit binding name with default
+    return;
   } else if (ClassifyProcedure(symbol) == ProcedureDefinitionClass::Internal) {
     // BIND(C) does not give an implicit binding label to internal procedures.
     return;
diff --git a/flang/test/Semantics/declarations03.f90 b/flang/test/Semantics/declarations03.f90
index 65b07e7d5c6567..8e6f0a4aaf6bd6 100644
--- a/flang/test/Semantics/declarations03.f90
+++ b/flang/test/Semantics/declarations03.f90
@@ -50,6 +50,9 @@ module m
   !ERROR: BIND_C attribute was already specified on 's5'
   integer, bind(c, name="ss2") :: s5
 
+  integer, bind(c, name="s6explicit") :: s6
+  dimension s6(10) ! caused spurious error
+
 end
 
 subroutine common1()

From 7cc789bcfba8050eb20ecb8a24508d9a4711dba0 Mon Sep 17 00:00:00 2001
From: Peter Klausler <35819229+klausler@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:57:00 -0700
Subject: [PATCH 60/65] [flang] Silence spurious errors from benign USE errors
 (#106097)

When USE association encounters a conflict that can't be resolved, it
produces a "UseError" symbol that will trigger an error message if that
symbol is ever actually used. UseError symbols that aren't used are
benign.

Ensure that UseError symbols don't run the gamut of declaration
checking. They were getting through, and could lead to spurious error
messages.

Fixes https://github.com/llvm/llvm-project/issues/106020.
---
 flang/lib/Semantics/check-declarations.cpp | 3 +++
 flang/test/Semantics/resolve82.f90         | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index de3fa8794caedf..734c34276b13b9 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -256,6 +256,9 @@ static bool IsBlockData(const Symbol &symbol) {
 }
 
 void CheckHelper::Check(const Symbol &symbol) {
+  if (symbol.has<UseErrorDetails>()) {
+    return;
+  }
   if (symbol.name().size() > common::maxNameLen &&
       &symbol == &symbol.GetUltimate()) {
     if (context_.ShouldWarn(common::LanguageFeature::LongNames)) {
diff --git a/flang/test/Semantics/resolve82.f90 b/flang/test/Semantics/resolve82.f90
index 88339742efdb36..989ce1d837c705 100644
--- a/flang/test/Semantics/resolve82.f90
+++ b/flang/test/Semantics/resolve82.f90
@@ -34,6 +34,7 @@ end function procFunc
   real y
   common /blk/ y
   protected y
+  logical,protected,external,pointer :: z
 
 contains
 
@@ -60,3 +61,8 @@ subroutine testProcDecl(arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11)
     end subroutine testProcDecl
 
 end module m
+
+subroutine subb()
+  !Ensure no spurious error from a benign UseError
+  use m, testProcDecl=>z
+end

From 4e30cf7b2a94b502abb10c400255547e50f79648 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Mon, 26 Aug 2024 11:02:05 -0700
Subject: [PATCH 61/65] [LTO] Introduce getSourceModules (NFC) (#105955)

This patch introduces getSourceModules to compute the list of source
modules in the ascending alphabetical order.  The new function is
intended to hide implementation details of ImportMapTy while
simplifying FunctionImporter::importFunctions a little bit.
---
 llvm/include/llvm/Transforms/IPO/FunctionImport.h |  4 ++++
 llvm/lib/Transforms/IPO/FunctionImport.cpp        | 15 +++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 93d831c26938bb..b5b969220df85b 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -139,6 +139,10 @@ class FunctionImporter {
         maybeAddDeclaration(FromModule, GUID);
     }
 
+    // Return the list of source modules sorted in the ascending alphabetical
+    // order.
+    SmallVector<StringRef, 0> getSourceModules() const;
+
     const ImportMapTyImpl &getImportMap() const { return ImportMap; }
 
   private:
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 74a71cbf101b5d..dd01d143b066b9 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -352,6 +352,13 @@ void FunctionImporter::ImportMapTy::maybeAddDeclaration(
   ImportMap[FromModule].try_emplace(GUID, GlobalValueSummary::Declaration);
 }
 
+SmallVector<StringRef, 0>
+FunctionImporter::ImportMapTy::getSourceModules() const {
+  SmallVector<StringRef, 0> Modules(make_first_range(ImportMap));
+  llvm::sort(Modules);
+  return Modules;
+}
+
 /// Import globals referenced by a function or other globals that are being
 /// imported, if importing such global is possible.
 class GlobalsImporter final {
@@ -1770,11 +1777,6 @@ Expected<bool> FunctionImporter::importFunctions(
   unsigned ImportedCount = 0, ImportedGVCount = 0;
 
   IRMover Mover(DestModule);
-  // Do the actual import of functions now, one Module at a time
-  std::set<StringRef> ModuleNameOrderedList;
-  for (const auto &FunctionsToImportPerModule : ImportList.getImportMap()) {
-    ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
-  }
 
   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
                            GlobalValue::GUID GUID)
@@ -1785,7 +1787,8 @@ Expected<bool> FunctionImporter::importFunctions(
     return Iter->second;
   };
 
-  for (const auto &Name : ModuleNameOrderedList) {
+  // Do the actual import of functions now, one Module at a time
+  for (const auto &Name : ImportList.getSourceModules()) {
     // Get the module for the import
     const auto &FunctionsToImportPerModule =
         ImportList.getImportMap().find(Name);

From 7fc67b5eb53e3b99a99406ae39fba71c3f5538de Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 26 Aug 2024 20:11:43 +0200
Subject: [PATCH 62/65] [bazel] Port 1387ba48a312b6e9b174d850f8c9a1322f44c623

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index ddb08f12f04976..866bd5ed6fd3e6 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1534,6 +1534,7 @@ cc_library(
         ":BytecodeOpInterface",
         ":GPUDialect",
         ":IR",
+        ":ROCDLDialect",
         ":SideEffectInterfaces",
         "//llvm:Support",
     ],
@@ -8581,11 +8582,14 @@ cc_library(
     includes = ["include"],
     deps = [
         ":AMDGPUDialect",
+        ":AMDGPUUtils",
         ":ArithDialect",
         ":ArithUtils",
         ":ConversionPassIncGen",
         ":IR",
+        ":LLVMDialect",
         ":Pass",
+        ":ROCDLDialect",
         ":Support",
         ":TransformUtils",
         ":VectorDialect",

From 9b00ef5261b69541f36334308690420c99fd89f1 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk@google.com>
Date: Mon, 26 Aug 2024 11:26:47 -0700
Subject: [PATCH 63/65] Revert "Add unit tests for size returning new funcs in
 the MemProf use pass. (#105473)" (#106114)

This reverts commit 2e426fe8ff314c2565073e73e27fdbdf36c140a3.
---
 .../llvm/ProfileData/InstrProfReader.h        |   8 +-
 .../Transforms/Instrumentation/MemProfiler.h  |  19 +--
 .../Instrumentation/MemProfiler.cpp           |  42 ++---
 .../Transforms/Instrumentation/CMakeLists.txt |   1 -
 .../Instrumentation/MemProfilerTest.cpp       | 158 ------------------
 5 files changed, 29 insertions(+), 199 deletions(-)
 delete mode 100644 llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp

diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 95c891442fd6e9..3b307d08359980 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -670,11 +670,10 @@ class IndexedMemProfReader {
 
 public:
   IndexedMemProfReader() = default;
-  virtual ~IndexedMemProfReader() = default;
 
   Error deserialize(const unsigned char *Start, uint64_t MemProfOffset);
 
-  virtual Expected<memprof::MemProfRecord>
+  Expected<memprof::MemProfRecord>
   getMemProfRecord(const uint64_t FuncNameHash) const;
 };
 
@@ -769,14 +768,11 @@ class IndexedInstrProfReader : public InstrProfReader {
                      uint64_t *MismatchedFuncSum = nullptr);
 
   /// Return the memprof record for the function identified by
-  /// llvm::md5(Name). Marked virtual so that unit tests can mock this function.
+  /// llvm::md5(Name).
   Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash) {
     return MemProfReader.getMemProfRecord(FuncNameHash);
   }
 
-  /// Return the underlying memprof reader.
-  IndexedMemProfReader &getIndexedMemProfReader() { return MemProfReader; }
-
   /// Fill Counts with the profile data for the given function name.
   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
                           std::vector<uint64_t> &Counts);
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index c5d03c98f41581..f92c6b4775a2a2 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -13,15 +13,15 @@
 #define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/ProfileData/InstrProfReader.h"
-#include "llvm/Support/VirtualFileSystem.h"
 
 namespace llvm {
 class Function;
 class Module;
-class TargetLibraryInfo;
+
+namespace vfs {
+class FileSystem;
+} // namespace vfs
 
 /// Public interface to the memory profiler pass for instrumenting code to
 /// profile memory accesses.
@@ -52,17 +52,6 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
                           IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
-  struct AllocMatchInfo {
-    uint64_t TotalSize = 0;
-    AllocationType AllocType = AllocationType::None;
-    bool Matched = false;
-  };
-
-  void
-  readMemprof(Function &F, const IndexedMemProfReader &MemProfReader,
-              const TargetLibraryInfo &TLI,
-              std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo);
-
 private:
   std::string MemoryProfileFileName;
   IntrusiveRefCntPtr<vfs::FileSystem> FS;
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index bd10c037ecf4ad..4a43120c9a9e7f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/HashBuilder.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -54,7 +55,6 @@ namespace llvm {
 extern cl::opt<bool> PGOWarnMissing;
 extern cl::opt<bool> NoPGOWarnMismatch;
 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
-using AllocMatchInfo = ::llvm::MemProfUsePass::AllocMatchInfo;
 } // namespace llvm
 
 constexpr int LLVM_MEM_PROFILER_VERSION = 1;
@@ -148,11 +148,10 @@ static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
 
 // By default disable matching of allocation profiles onto operator new that
 // already explicitly pass a hot/cold hint, since we don't currently
-// override these hints anyway. Not static so that it can be set in the unit
-// test too.
-cl::opt<bool> ClMemProfMatchHotColdNew(
+// override these hints anyway.
+static cl::opt<bool> ClMemProfMatchHotColdNew(
     "memprof-match-hot-cold-new",
-    cl::desc(
+ cl::desc(
         "Match allocation profiles onto existing hot/cold operator new calls"),
     cl::Hidden, cl::init(false));
 
@@ -790,11 +789,17 @@ static bool isAllocationWithHotColdVariant(Function *Callee,
   }
 }
 
-void MemProfUsePass::readMemprof(
-    Function &F, const IndexedMemProfReader &MemProfReader,
-    const TargetLibraryInfo &TLI,
-    std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
-  auto &Ctx = F.getContext();
+struct AllocMatchInfo {
+  uint64_t TotalSize = 0;
+  AllocationType AllocType = AllocationType::None;
+  bool Matched = false;
+};
+
+static void
+readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
+            const TargetLibraryInfo &TLI,
+            std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
+  auto &Ctx = M.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
   // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
@@ -805,7 +810,7 @@ void MemProfUsePass::readMemprof(
   auto FuncName = F.getName();
   auto FuncGUID = Function::getGUID(FuncName);
   std::optional<memprof::MemProfRecord> MemProfRec;
-  auto Err = MemProfReader.getMemProfRecord(FuncGUID).moveInto(MemProfRec);
+  auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
   if (Err) {
     handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
       auto Err = IPE.get();
@@ -833,8 +838,8 @@ void MemProfUsePass::readMemprof(
                          Twine(" Hash = ") + std::to_string(FuncGUID))
                             .str();
 
-      Ctx.diagnose(DiagnosticInfoPGOProfile(F.getParent()->getName().data(),
-                                            Msg, DS_Warning));
+      Ctx.diagnose(
+          DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
     });
     return;
   }
@@ -1031,15 +1036,15 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
     return PreservedAnalyses::all();
   }
 
-  std::unique_ptr<IndexedInstrProfReader> IndexedReader =
+  std::unique_ptr<IndexedInstrProfReader> MemProfReader =
       std::move(ReaderOrErr.get());
-  if (!IndexedReader) {
+  if (!MemProfReader) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(
-        MemoryProfileFileName.data(), StringRef("Cannot get IndexedReader")));
+        MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
     return PreservedAnalyses::all();
   }
 
-  if (!IndexedReader->hasMemoryProfile()) {
+  if (!MemProfReader->hasMemoryProfile()) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
                                           "Not a memory profile"));
     return PreservedAnalyses::all();
@@ -1052,13 +1057,12 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   // it to an allocation in the IR.
   std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
 
-  const auto &MemProfReader = IndexedReader->getIndexedMemProfReader();
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
 
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
-    readMemprof(F, MemProfReader, TLI, FullStackIdToAllocMatchInfo);
+    readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
   }
 
   if (ClPrintMemProfMatchInfo) {
diff --git a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt
index 1afe1c339e4335..1f249b0049d062 100644
--- a/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Instrumentation/CMakeLists.txt
@@ -9,7 +9,6 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(InstrumentationTests
   PGOInstrumentationTest.cpp
-  MemProfilerTest.cpp
   )
 
 target_link_libraries(InstrumentationTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp
deleted file mode 100644
index 844867d676e8dd..00000000000000
--- a/llvm/unittests/Transforms/Instrumentation/MemProfilerTest.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===- MemProfilerTest.cpp - MemProfiler unit tests ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Instrumentation/MemProfiler.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Passes/PassBuilder.h"
-#include "llvm/ProfileData/InstrProfReader.h"
-#include "llvm/ProfileData/MemProf.h"
-#include "llvm/ProfileData/MemProfData.inc"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/SourceMgr.h"
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-extern llvm::cl::opt<bool> ClMemProfMatchHotColdNew;
-
-namespace llvm {
-namespace memprof {
-namespace {
-
-using ::testing::Return;
-using ::testing::SizeIs;
-
-struct MemProfilerTest : public ::testing::Test {
-  LLVMContext Context;
-  std::unique_ptr<Module> M;
-
-  MemProfilerTest() { ClMemProfMatchHotColdNew = true; }
-
-  void parseAssembly(const StringRef IR) {
-    SMDiagnostic Error;
-    M = parseAssemblyString(IR, Error, Context);
-    std::string ErrMsg;
-    raw_string_ostream OS(ErrMsg);
-    Error.print("", OS);
-
-    // A failure here means that the test itself is buggy.
-    if (!M)
-      report_fatal_error(OS.str().c_str());
-  }
-};
-
-// A mock memprof reader we can inject into the function we are testing.
-class MockMemProfReader : public IndexedMemProfReader {
-public:
-  MOCK_METHOD(Expected<MemProfRecord>, getMemProfRecord,
-              (const uint64_t FuncNameHash), (const, override));
-
-  // A helper function to create mock records from frames.
-  static MemProfRecord makeRecord(ArrayRef<ArrayRef<Frame>> AllocFrames) {
-    MemProfRecord Record;
-    MemInfoBlock Info;
-    // Mimic values which will be below the cold threshold.
-    Info.AllocCount = 1, Info.TotalSize = 550;
-    Info.TotalLifetime = 1000 * 1000, Info.TotalLifetimeAccessDensity = 1;
-    for (const auto &Callstack : AllocFrames) {
-      AllocationInfo AI;
-      AI.Info = PortableMemInfoBlock(Info, getHotColdSchema());
-      AI.CallStack = std::vector(Callstack.begin(), Callstack.end());
-      Record.AllocSites.push_back(AI);
-    }
-    return Record;
-  }
-};
-
-TEST_F(MemProfilerTest, AnnotatesCall) {
-  parseAssembly(R"IR(
-    target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-    target triple = "x86_64-unknown-linux-gnu"
-
-    define void @_Z3foov() !dbg !10 {
-    entry:
-      %c1 = call {ptr, i64} @__size_returning_new(i64 32), !dbg !13
-      %c2 = call {ptr, i64} @__size_returning_new_aligned(i64 32, i64 8), !dbg !14
-      %c3 = call {ptr, i64} @__size_returning_new_hot_cold(i64 32, i8 254), !dbg !15
-      %c4 = call {ptr, i64} @__size_returning_new_aligned_hot_cold(i64 32, i64 8, i8 254), !dbg !16
-      ret void
-    }
-
-    declare {ptr, i64} @__size_returning_new(i64)
-    declare {ptr, i64} @__size_returning_new_aligned(i64, i64)
-    declare {ptr, i64} @__size_returning_new_hot_cold(i64, i8)
-    declare {ptr, i64} @__size_returning_new_aligned_hot_cold(i64, i64, i8)
-
-    !llvm.dbg.cu = !{!0}
-    !llvm.module.flags = !{!2, !3}
-
-    !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
-    !1 = !DIFile(filename: "mock_file.cc", directory: "mock_dir")
-    !2 = !{i32 7, !"Dwarf Version", i32 5}
-    !3 = !{i32 2, !"Debug Info Version", i32 3}
-    !10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, unit: !0, retainedNodes: !12)
-    !11 = !DISubroutineType(types: !12)
-    !12 = !{}
-    !13 = !DILocation(line: 5, column: 10, scope: !10)
-    !14 = !DILocation(line: 6, column: 10, scope: !10)
-    !15 = !DILocation(line: 7, column: 10, scope: !10)
-    !16 = !DILocation(line: 8, column: 10, scope: !10)
-  )IR");
-
-  auto *F = M->getFunction("_Z3foov");
-  ASSERT_NE(F, nullptr);
-
-  TargetLibraryInfoWrapperPass WrapperPass;
-  auto &TLI = WrapperPass.getTLI(*F);
-
-  auto Guid = Function::getGUID("_Z3foov");
-  // All the allocation sites are in foo().
-  MemProfRecord MockRecord =
-      MockMemProfReader::makeRecord({{Frame(Guid, 1, 10, false)},
-                                     {Frame(Guid, 2, 10, false)},
-                                     {Frame(Guid, 3, 10, false)},
-                                     {Frame(Guid, 4, 10, false)}});
-  // Set up mocks for the reader.
-  MockMemProfReader Reader;
-  EXPECT_CALL(Reader, getMemProfRecord(Guid)).WillOnce(Return(MockRecord));
-
-  MemProfUsePass Pass("/unused/profile/path");
-  std::map<uint64_t, MemProfUsePass::AllocMatchInfo> Unused;
-  Pass.readMemprof(*F, Reader, TLI, Unused);
-
-  // Since we only have a single type of behaviour for each allocation site, we
-  // only get function attributes.
-  std::vector<llvm::Attribute> CallsiteAttrs;
-  for (const auto &BB : *F) {
-    for (const auto &I : BB) {
-      if (auto *CI = dyn_cast<CallInst>(&I)) {
-        if (!CI->getCalledFunction()->getName().starts_with(
-                "__size_returning_new"))
-          continue;
-        Attribute Attr = CI->getFnAttr("memprof");
-        // The attribute will be invalid if it didn't find one named memprof.
-        ASSERT_TRUE(Attr.isValid());
-        CallsiteAttrs.push_back(Attr);
-      }
-    }
-  }
-
-  // We match all the variants including ones with the hint since we set
-  // ClMemProfMatchHotColdNew to true.
-  EXPECT_THAT(CallsiteAttrs, SizeIs(4));
-}
-
-} // namespace
-} // namespace memprof
-} // namespace llvm

From d88876e74f7882643546becc544a771a5e5e9787 Mon Sep 17 00:00:00 2001
From: vporpo <vporpodas@google.com>
Date: Mon, 26 Aug 2024 11:33:56 -0700
Subject: [PATCH 64/65] [SandboxIR] Implement FenceInst (#105920)

This patch implements sandboxir::FenceInst mirroring llvm::FenceInst.
---
 llvm/include/llvm/SandboxIR/SandboxIR.h       | 32 +++++++++++++
 .../llvm/SandboxIR/SandboxIRValues.def        |  1 +
 llvm/lib/SandboxIR/SandboxIR.cpp              | 37 +++++++++++++++
 llvm/unittests/SandboxIR/SandboxIRTest.cpp    | 46 +++++++++++++++++++
 llvm/unittests/SandboxIR/TrackerTest.cpp      | 34 ++++++++++++++
 5 files changed, 150 insertions(+)

diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h
index b8a28669cdd074..32e23ddfcafeed 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIR.h
+++ b/llvm/include/llvm/SandboxIR/SandboxIR.h
@@ -111,6 +111,7 @@ class ConstantInt;
 class Context;
 class Function;
 class Instruction;
+class FenceInst;
 class SelectInst;
 class ExtractElementInst;
 class InsertElementInst;
@@ -249,6 +250,7 @@ class Value {
   friend class Context;               // For getting `Val`.
   friend class User;                  // For getting `Val`.
   friend class Use;                   // For getting `Val`.
+  friend class FenceInst;             // For getting `Val`.
   friend class SelectInst;            // For getting `Val`.
   friend class ExtractElementInst;    // For getting `Val`.
   friend class InsertElementInst;     // For getting `Val`.
@@ -678,6 +680,7 @@ class Instruction : public sandboxir::User {
   /// A SandboxIR Instruction may map to multiple LLVM IR Instruction. This
   /// returns its topmost LLVM IR instruction.
   llvm::Instruction *getTopmostLLVMInstruction() const;
+  friend class FenceInst;          // For getTopmostLLVMInstruction().
   friend class SelectInst;         // For getTopmostLLVMInstruction().
   friend class ExtractElementInst; // For getTopmostLLVMInstruction().
   friend class InsertElementInst;  // For getTopmostLLVMInstruction().
@@ -882,6 +885,33 @@ template <typename LLVMT> class SingleLLVMInstructionImpl : public Instruction {
 #endif
 };
 
+class FenceInst : public SingleLLVMInstructionImpl<llvm::SelectInst> {
+  FenceInst(llvm::FenceInst *FI, Context &Ctx)
+      : SingleLLVMInstructionImpl(ClassID::Fence, Opcode::Fence, FI, Ctx) {}
+  friend Context; // For constructor;
+
+public:
+  static FenceInst *create(AtomicOrdering Ordering, BBIterator WhereIt,
+                           BasicBlock *WhereBB, Context &Ctx,
+                           SyncScope::ID SSID = SyncScope::System);
+  /// Returns the ordering constraint of this fence instruction.
+  AtomicOrdering getOrdering() const {
+    return cast<llvm::FenceInst>(Val)->getOrdering();
+  }
+  /// Sets the ordering constraint of this fence instruction.  May only be
+  /// Acquire, Release, AcquireRelease, or SequentiallyConsistent.
+  void setOrdering(AtomicOrdering Ordering);
+  /// Returns the synchronization scope ID of this fence instruction.
+  SyncScope::ID getSyncScopeID() const {
+    return cast<llvm::FenceInst>(Val)->getSyncScopeID();
+  }
+  /// Sets the synchronization scope ID of this fence instruction.
+  void setSyncScopeID(SyncScope::ID SSID);
+  static bool classof(const Value *From) {
+    return From->getSubclassID() == ClassID::Fence;
+  }
+};
+
 class SelectInst : public SingleLLVMInstructionImpl<llvm::SelectInst> {
   /// Use Context::createSelectInst(). Don't call the
   /// constructor directly.
@@ -2854,6 +2884,8 @@ class Context {
   IRBuilder<ConstantFolder> LLVMIRBuilder;
   auto &getLLVMIRBuilder() { return LLVMIRBuilder; }
 
+  FenceInst *createFenceInst(llvm::FenceInst *SI);
+  friend FenceInst; // For createFenceInst()
   SelectInst *createSelectInst(llvm::SelectInst *SI);
   friend SelectInst; // For createSelectInst()
   InsertElementInst *createInsertElementInst(llvm::InsertElementInst *IEI);
diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def
index 14cb2d72ad3af6..7bac00fb2918a4 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def
+++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def
@@ -37,6 +37,7 @@ DEF_USER(ConstantInt, ConstantInt)
 DEF_INSTR(Opaque,         OP(Opaque),         OpaqueInst)
 DEF_INSTR(ExtractElement, OP(ExtractElement), ExtractElementInst)
 DEF_INSTR(InsertElement,  OP(InsertElement),  InsertElementInst)
+DEF_INSTR(Fence,          OP(Fence),          FenceInst)
 DEF_INSTR(ShuffleVector,  OP(ShuffleVector),  ShuffleVectorInst)
 DEF_INSTR(Select,         OP(Select),         SelectInst)
 DEF_INSTR(Br,             OP(Br),             BranchInst)
diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp
index f92e9d38125139..559fb4d10fff52 100644
--- a/llvm/lib/SandboxIR/SandboxIR.cpp
+++ b/llvm/lib/SandboxIR/SandboxIR.cpp
@@ -575,6 +575,33 @@ void Instruction::dumpOS(raw_ostream &OS) const {
 }
 #endif // NDEBUG
 
+FenceInst *FenceInst::create(AtomicOrdering Ordering, BBIterator WhereIt,
+                             BasicBlock *WhereBB, Context &Ctx,
+                             SyncScope::ID SSID) {
+  auto &Builder = Ctx.getLLVMIRBuilder();
+  if (WhereIt != WhereBB->end())
+    Builder.SetInsertPoint((*WhereIt).getTopmostLLVMInstruction());
+  else
+    Builder.SetInsertPoint(cast<llvm::BasicBlock>(WhereBB->Val));
+  llvm::FenceInst *LLVMI = Builder.CreateFence(Ordering, SSID);
+  return Ctx.createFenceInst(LLVMI);
+}
+
+void FenceInst::setOrdering(AtomicOrdering Ordering) {
+  Ctx.getTracker()
+      .emplaceIfTracking<
+          GenericSetter<&FenceInst::getOrdering, &FenceInst::setOrdering>>(
+          this);
+  cast<llvm::FenceInst>(Val)->setOrdering(Ordering);
+}
+
+void FenceInst::setSyncScopeID(SyncScope::ID SSID) {
+  Ctx.getTracker()
+      .emplaceIfTracking<GenericSetter<&FenceInst::getSyncScopeID,
+                                       &FenceInst::setSyncScopeID>>(this);
+  cast<llvm::FenceInst>(Val)->setSyncScopeID(SSID);
+}
+
 Value *SelectInst::createCommon(Value *Cond, Value *True, Value *False,
                                 const Twine &Name, IRBuilder<> &Builder,
                                 Context &Ctx) {
@@ -2157,6 +2184,11 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) {
   assert(isa<llvm::Instruction>(LLVMV) && "Expected Instruction");
 
   switch (cast<llvm::Instruction>(LLVMV)->getOpcode()) {
+  case llvm::Instruction::Fence: {
+    auto *LLVMFence = cast<llvm::FenceInst>(LLVMV);
+    It->second = std::unique_ptr<FenceInst>(new FenceInst(LLVMFence, *this));
+    return It->second.get();
+  }
   case llvm::Instruction::Select: {
     auto *LLVMSel = cast<llvm::SelectInst>(LLVMV);
     It->second = std::unique_ptr<SelectInst>(new SelectInst(LLVMSel, *this));
@@ -2349,6 +2381,11 @@ BasicBlock *Context::createBasicBlock(llvm::BasicBlock *LLVMBB) {
   return BB;
 }
 
+FenceInst *Context::createFenceInst(llvm::FenceInst *SI) {
+  auto NewPtr = std::unique_ptr<FenceInst>(new FenceInst(SI, *this));
+  return cast<FenceInst>(registerValue(std::move(NewPtr)));
+}
+
 SelectInst *Context::createSelectInst(llvm::SelectInst *SI) {
   auto NewPtr = std::unique_ptr<SelectInst>(new SelectInst(SI, *this));
   return cast<SelectInst>(registerValue(std::move(NewPtr)));
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index 83edd954080e9f..a7192ac98af41a 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -580,6 +580,52 @@ define void @foo(i8 %v1) {
   EXPECT_EQ(I0->getNextNode(), Ret);
 }
 
+TEST_F(SandboxIRTest, FenceInst) {
+  parseIR(C, R"IR(
+define void @foo() {
+  fence syncscope("singlethread") seq_cst
+  ret void
+}
+)IR");
+  llvm::Function *LLVMF = &*M->getFunction("foo");
+  llvm::BasicBlock *LLVMBB = &*LLVMF->begin();
+  auto *LLVMFence = cast<llvm::FenceInst>(&*LLVMBB->begin());
+  sandboxir::Context Ctx(C);
+  sandboxir::Function *F = Ctx.createFunction(LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  auto *Fence = cast<sandboxir::FenceInst>(&*It++);
+  auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
+
+  // Check getOrdering().
+  EXPECT_EQ(Fence->getOrdering(), LLVMFence->getOrdering());
+  // Check setOrdering().
+  auto OrigOrdering = Fence->getOrdering();
+  auto NewOrdering = AtomicOrdering::Release;
+  EXPECT_NE(NewOrdering, OrigOrdering);
+  Fence->setOrdering(NewOrdering);
+  EXPECT_EQ(Fence->getOrdering(), NewOrdering);
+  Fence->setOrdering(OrigOrdering);
+  EXPECT_EQ(Fence->getOrdering(), OrigOrdering);
+  // Check getSyncScopeID().
+  EXPECT_EQ(Fence->getSyncScopeID(), LLVMFence->getSyncScopeID());
+  // Check setSyncScopeID().
+  auto OrigSSID = Fence->getSyncScopeID();
+  auto NewSSID = SyncScope::System;
+  EXPECT_NE(NewSSID, OrigSSID);
+  Fence->setSyncScopeID(NewSSID);
+  EXPECT_EQ(Fence->getSyncScopeID(), NewSSID);
+  Fence->setSyncScopeID(OrigSSID);
+  EXPECT_EQ(Fence->getSyncScopeID(), OrigSSID);
+  // Check create().
+  auto *NewFence =
+      sandboxir::FenceInst::create(AtomicOrdering::Release, Ret->getIterator(),
+                                   BB, Ctx, SyncScope::SingleThread);
+  EXPECT_EQ(NewFence->getNextNode(), Ret);
+  EXPECT_EQ(NewFence->getOrdering(), AtomicOrdering::Release);
+  EXPECT_EQ(NewFence->getSyncScopeID(), SyncScope::SingleThread);
+}
+
 TEST_F(SandboxIRTest, SelectInst) {
   parseIR(C, R"IR(
 define void @foo(i1 %c0, i8 %v0, i8 %v1, i1 %c1) {
diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp
index f0d6a0d57b8c3e..5f04cbd5840ba5 100644
--- a/llvm/unittests/SandboxIR/TrackerTest.cpp
+++ b/llvm/unittests/SandboxIR/TrackerTest.cpp
@@ -542,6 +542,40 @@ define void @foo(ptr %ptr) {
   EXPECT_EQ(It, BB->end());
 }
 
+TEST_F(TrackerTest, FenceInstSetters) {
+  parseIR(C, R"IR(
+define void @foo() {
+  fence syncscope("singlethread") seq_cst
+  ret void
+}
+)IR");
+  llvm::Function *LLVMF = &*M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  sandboxir::Function *F = Ctx.createFunction(LLVMF);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  auto *Fence = cast<sandboxir::FenceInst>(&*It++);
+
+  // Check setOrdering().
+  auto OrigOrdering = Fence->getOrdering();
+  auto NewOrdering = AtomicOrdering::Release;
+  EXPECT_NE(NewOrdering, OrigOrdering);
+  Ctx.save();
+  Fence->setOrdering(NewOrdering);
+  EXPECT_EQ(Fence->getOrdering(), NewOrdering);
+  Ctx.revert();
+  EXPECT_EQ(Fence->getOrdering(), OrigOrdering);
+  // Check setSyncScopeID().
+  auto OrigSSID = Fence->getSyncScopeID();
+  auto NewSSID = SyncScope::System;
+  EXPECT_NE(NewSSID, OrigSSID);
+  Ctx.save();
+  Fence->setSyncScopeID(NewSSID);
+  EXPECT_EQ(Fence->getSyncScopeID(), NewSSID);
+  Ctx.revert();
+  EXPECT_EQ(Fence->getSyncScopeID(), OrigSSID);
+}
+
 TEST_F(TrackerTest, CallBaseSetters) {
   parseIR(C, R"IR(
 declare void @bar1(i8)

From 4b0c0ec6b8065e611640f44adce94e2da12b3a3a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 26 Aug 2024 11:40:25 -0700
Subject: [PATCH 65/65] [CodeGen] Use MCRegister for CCState::AllocateReg and
 CCValAssign::getReg. NFC (#106032)

---
 llvm/include/llvm/CodeGen/CallingConvLower.h  | 12 +++++-----
 llvm/lib/Target/ARM/ARMCallingConv.cpp        | 16 +++++++-------
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  2 +-
 .../LoongArch/LoongArchISelLowering.cpp       |  6 ++---
 llvm/lib/Target/MSP430/MSP430ISelLowering.cpp |  4 ++--
 llvm/lib/Target/Mips/MipsISelLowering.cpp     |  2 +-
 llvm/lib/Target/PowerPC/PPCCallingConv.cpp    |  6 ++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 22 +++++++++----------
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 22 +++++++++----------
 llvm/lib/Target/X86/X86CallingConv.cpp        | 10 ++++-----
 llvm/utils/TableGen/CallingConvEmitter.cpp    |  8 +++----
 11 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 932a2a94ab1f1a..d5a63c8dd627a0 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -81,16 +81,16 @@ class CCValAssign {
   }
 
 public:
-  static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo,
+  static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg,
                             MVT LocVT, LocInfo HTP, bool IsCustom = false) {
     CCValAssign Ret(HTP, ValNo, ValVT, LocVT, IsCustom);
-    Ret.Data = Register(RegNo);
+    Ret.Data = Register(Reg);
     return Ret;
   }
 
-  static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo,
+  static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg,
                                   MVT LocVT, LocInfo HTP) {
-    return getReg(ValNo, ValVT, RegNo, LocVT, HTP, /*IsCustom=*/true);
+    return getReg(ValNo, ValVT, Reg, LocVT, HTP, /*IsCustom=*/true);
   }
 
   static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset,
@@ -112,7 +112,7 @@ class CCValAssign {
     return Ret;
   }
 
-  void convertToReg(unsigned RegNo) { Data = Register(RegNo); }
+  void convertToReg(MCRegister Reg) { Data = Register(Reg); }
 
   void convertToMem(int64_t Offset) { Data = Offset; }
 
@@ -346,7 +346,7 @@ class CCState {
   /// AllocateReg - Attempt to allocate one of the specified registers.  If none
   /// are available, return zero.  Otherwise, return the first one available,
   /// marking it and any aliases as allocated.
-  MCPhysReg AllocateReg(ArrayRef<MCPhysReg> Regs) {
+  MCRegister AllocateReg(ArrayRef<MCPhysReg> Regs) {
     unsigned FirstUnalloc = getFirstUnallocated(Regs);
     if (FirstUnalloc == Regs.size())
       return MCRegister();    // Didn't find the reg.
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp
index 4878c73138940d..2ab66da4b4d2d9 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -24,7 +24,7 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
   static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
   // Try to get the first register.
-  if (unsigned Reg = State.AllocateReg(RegList))
+  if (MCRegister Reg = State.AllocateReg(RegList))
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   else {
     // For the 2nd half of a v2f64, do not fail.
@@ -38,7 +38,7 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
   }
 
   // Try to get the second register.
-  if (unsigned Reg = State.AllocateReg(RegList))
+  if (MCRegister Reg = State.AllocateReg(RegList))
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   else
     State.addLoc(CCValAssign::getCustomMem(
@@ -67,8 +67,8 @@ static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
   static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
   static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
-  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
-  if (Reg == 0) {
+  MCRegister Reg = State.AllocateReg(HiRegList, ShadowRegList);
+  if (!Reg) {
 
     // If we had R3 unallocated only, now we still must to waste it.
     Reg = State.AllocateReg(GPRArgRegs);
@@ -89,7 +89,7 @@ static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
     if (HiRegList[i] == Reg)
       break;
 
-  unsigned T = State.AllocateReg(LoRegList[i]);
+  MCRegister T = State.AllocateReg(LoRegList[i]);
   (void)T;
   assert(T == LoRegList[i] && "Could not allocate register");
 
@@ -116,8 +116,8 @@ static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT,
   static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
   static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
 
-  unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
-  if (Reg == 0)
+  MCRegister Reg = State.AllocateReg(HiRegList, LoRegList);
+  if (!Reg)
     return false; // we didn't handle it
 
   unsigned i;
@@ -287,7 +287,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
 static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,
                                   CCValAssign::LocInfo LocInfo, CCState &State,
                                   ArrayRef<MCPhysReg> RegList) {
-  unsigned Reg = State.AllocateReg(RegList);
+  MCRegister Reg = State.AllocateReg(RegList);
   if (Reg) {
     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     return true;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4ab0433069ae66..853f54943eebf1 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2915,7 +2915,7 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
   // Byval (as with any stack) slots are always at least 4 byte aligned.
   Alignment = std::max(Alignment, Align(4));
 
-  unsigned Reg = State->AllocateReg(GPRArgRegs);
+  MCRegister Reg = State->AllocateReg(GPRArgRegs);
   if (!Reg)
     return;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 50c6c263e966b5..95c1b150722f64 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5012,7 +5012,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
         LoongArch::R23, LoongArch::R24, LoongArch::R25,
         LoongArch::R26, LoongArch::R27, LoongArch::R28,
         LoongArch::R29, LoongArch::R30, LoongArch::R31};
-    if (unsigned Reg = State.AllocateReg(GPRList)) {
+    if (MCRegister Reg = State.AllocateReg(GPRList)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -5023,7 +5023,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
     //                        fs0,fs1,fs2,fs3
     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
                                           LoongArch::F26, LoongArch::F27};
-    if (unsigned Reg = State.AllocateReg(FPR32List)) {
+    if (MCRegister Reg = State.AllocateReg(FPR32List)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -5034,7 +5034,7 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
     //                        fs4,fs5,fs6,fs7
     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
                                           LoongArch::F30_64, LoongArch::F31_64};
-    if (unsigned Reg = State.AllocateReg(FPR64List)) {
+    if (MCRegister Reg = State.AllocateReg(FPR64List)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index ba7b6c85bd81a9..1c7a14464d7bb0 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -527,7 +527,7 @@ static void AnalyzeArguments(CCState &State,
 
     if (!UsedStack && Parts == 2 && RegsLeft == 1) {
       // Special case for 32-bit register split, see EABI section 3.3.3
-      unsigned Reg = State.AllocateReg(RegList);
+      MCRegister Reg = State.AllocateReg(RegList);
       State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
       RegsLeft -= 1;
 
@@ -535,7 +535,7 @@ static void AnalyzeArguments(CCState &State,
       CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State);
     } else if (Parts <= RegsLeft) {
       for (unsigned j = 0; j < Parts; j++) {
-        unsigned Reg = State.AllocateReg(RegList);
+        MCRegister Reg = State.AllocateReg(RegList);
         State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
         RegsLeft--;
       }
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0f2047fcac640e..31b86b32008903 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -2991,7 +2991,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
     } else {
       Reg = State.AllocateReg(F64Regs);
       // Shadow int registers
-      unsigned Reg2 = State.AllocateReg(IntRegs);
+      MCRegister Reg2 = State.AllocateReg(IntRegs);
       if (Reg2 == Mips::A1 || Reg2 == Mips::A3)
         State.AllocateReg(IntRegs);
       State.AllocateReg(IntRegs);
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
index 188fc96bc7c2a3..d5077ab2796519 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
@@ -151,7 +151,7 @@ static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
   static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 };
 
   // Try to get the first register.
-  unsigned Reg = State.AllocateReg(HiRegList);
+  MCRegister Reg = State.AllocateReg(HiRegList);
   if (!Reg)
     return false;
 
@@ -160,7 +160,7 @@ static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
     if (HiRegList[i] == Reg)
       break;
 
-  unsigned T = State.AllocateReg(LoRegList[i]);
+  MCRegister T = State.AllocateReg(LoRegList[i]);
   (void)T;
   assert(T == LoRegList[i] && "Could not allocate register");
 
@@ -180,7 +180,7 @@ static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT,
   static const MCPhysReg LoRegList[] = { PPC::R4 };
 
   // Try to get the first register.
-  unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+  MCRegister Reg = State.AllocateReg(HiRegList, LoRegList);
   if (!Reg)
     return false;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 459a96eca1ff20..efabfa0b511a6e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6904,7 +6904,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     while (NextReg != GPRs.size() &&
            !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
       // Shadow allocate next registers since its aligment is not strict enough.
-      unsigned Reg = State.AllocateReg(GPRs);
+      MCRegister Reg = State.AllocateReg(GPRs);
       // Allocate the stack space shadowed by said register.
       State.AllocateStack(PtrSize, PtrAlign);
       assert(Reg && "Alocating register unexpectedly failed.");
@@ -6915,7 +6915,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     const unsigned StackSize = alignTo(ByValSize, ObjAlign);
     unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
     for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
-      if (unsigned Reg = State.AllocateReg(GPRs))
+      if (MCRegister Reg = State.AllocateReg(GPRs))
         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
       else {
         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
@@ -6942,7 +6942,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
                                   : CCValAssign::LocInfo::ZExt;
-    if (unsigned Reg = State.AllocateReg(GPRs))
+    if (MCRegister Reg = State.AllocateReg(GPRs))
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
     else
       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
@@ -6957,13 +6957,13 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     // This includes f64 in 64-bit mode for ABI compatibility.
     const unsigned Offset =
         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
-    unsigned FReg = State.AllocateReg(FPR);
+    MCRegister FReg = State.AllocateReg(FPR);
     if (FReg)
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
 
     // Reserve and initialize GPRs or initialize the PSA as required.
     for (unsigned I = 0; I < StoreSize; I += PtrSize) {
-      if (unsigned Reg = State.AllocateReg(GPRs)) {
+      if (MCRegister Reg = State.AllocateReg(GPRs)) {
         assert(FReg && "An FPR should be available when a GPR is reserved.");
         if (State.isVarArg()) {
           // Successfully reserved GPRs are only initialized for vararg calls.
@@ -7003,7 +7003,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     if (!State.isVarArg()) {
       // If there are vector registers remaining we don't consume any stack
       // space.
-      if (unsigned VReg = State.AllocateReg(VR)) {
+      if (MCRegister VReg = State.AllocateReg(VR)) {
         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
         return false;
       }
@@ -7021,7 +7021,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     while (NextRegIndex != GPRs.size() &&
            !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
       // Shadow allocate register and its stack shadow.
-      unsigned Reg = State.AllocateReg(GPRs);
+      MCRegister Reg = State.AllocateReg(GPRs);
       State.AllocateStack(PtrSize, PtrAlign);
       assert(Reg && "Allocating register unexpectedly failed.");
       (void)Reg;
@@ -7033,7 +7033,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
     // functions)
     if (State.isFixed(ValNo)) {
-      if (unsigned VReg = State.AllocateReg(VR)) {
+      if (MCRegister VReg = State.AllocateReg(VR)) {
         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
         // Shadow allocate GPRs and stack space even though we pass in a VR.
         for (unsigned I = 0; I != VecSize; I += PtrSize)
@@ -7062,8 +7062,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
       State.addLoc(
           CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
 
-      const unsigned FirstReg = State.AllocateReg(PPC::R9);
-      const unsigned SecondReg = State.AllocateReg(PPC::R10);
+      const MCRegister FirstReg = State.AllocateReg(PPC::R9);
+      const MCRegister SecondReg = State.AllocateReg(PPC::R10);
       assert(FirstReg && SecondReg &&
              "Allocating R9 or R10 unexpectedly failed.");
       State.addLoc(
@@ -7080,7 +7080,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
     State.addLoc(
         CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     for (unsigned I = 0; I != VecSize; I += PtrSize) {
-      const unsigned Reg = State.AllocateReg(GPRs);
+      const MCRegister Reg = State.AllocateReg(GPRs);
       assert(Reg && "Failed to allocated register for vararg vector argument");
       State.addLoc(
           CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 670dee2edb1dfb..4e86bee6a55b54 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18625,7 +18625,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
   // Static chain parameter must not be passed in normal argument registers,
   // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
   if (ArgFlags.isNest()) {
-    if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
+    if (MCRegister Reg = State.AllocateReg(RISCV::X7)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19098,7 +19098,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
                             const RISCVTargetLowering &TLI,
                             RVVArgDispatcher &RVVDispatcher) {
   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
-    if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
+    if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19113,7 +19113,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
-    if (unsigned Reg = State.AllocateReg(FPR16List)) {
+    if (MCRegister Reg = State.AllocateReg(FPR16List)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19125,7 +19125,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
-    if (unsigned Reg = State.AllocateReg(FPR32List)) {
+    if (MCRegister Reg = State.AllocateReg(FPR32List)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19137,7 +19137,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
-    if (unsigned Reg = State.AllocateReg(FPR64List)) {
+    if (MCRegister Reg = State.AllocateReg(FPR64List)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19149,7 +19149,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
       (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
       (LocVT == MVT::f64 && Subtarget.is64Bit() &&
        Subtarget.hasStdExtZdinx())) {
-    if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
+    if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19184,7 +19184,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
           CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
     } else {
       // Try and pass the address via a "fast" GPR.
-      if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
+      if (MCRegister GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
         LocInfo = CCValAssign::Indirect;
         LocVT = TLI.getSubtarget().getXLenVT();
         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
@@ -19222,7 +19222,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
-    if (unsigned Reg = State.AllocateReg(GPRList)) {
+    if (MCRegister Reg = State.AllocateReg(GPRList)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19237,7 +19237,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
                                           RISCV::F18_F, RISCV::F19_F,
                                           RISCV::F20_F, RISCV::F21_F};
-    if (unsigned Reg = State.AllocateReg(FPR32List)) {
+    if (MCRegister Reg = State.AllocateReg(FPR32List)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19249,7 +19249,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
                                           RISCV::F24_D, RISCV::F25_D,
                                           RISCV::F26_D, RISCV::F27_D};
-    if (unsigned Reg = State.AllocateReg(FPR64List)) {
+    if (MCRegister Reg = State.AllocateReg(FPR64List)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19258,7 +19258,7 @@ bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
   if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
       (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
        Subtarget.is64Bit())) {
-    if (unsigned Reg = State.AllocateReg(GPRList)) {
+    if (MCRegister Reg = State.AllocateReg(GPRList)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 0ea51bec29b816..154cb1399880bc 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -51,7 +51,7 @@ static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT,
   for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
 
     // Marking the register as located.
-    unsigned Reg = State.AllocateReg(AvailableRegs[I]);
+    MCRegister Reg = State.AllocateReg(AvailableRegs[I]);
 
     // Since we previously made sure that 2 registers are available
     // we expect that a real register number will be returned.
@@ -102,7 +102,7 @@ static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
   for (auto Reg : RegList) {
     // If the register is not marked as allocated - assign to it.
     if (!State.isAllocated(Reg)) {
-      unsigned AssigedReg = State.AllocateReg(Reg);
+      MCRegister AssigedReg = State.AllocateReg(Reg);
       assert(AssigedReg == Reg && "Expecting a valid register allocation");
       State.addLoc(
           CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo));
@@ -158,7 +158,7 @@ static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
     (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());
 
     // Assign XMM register - (shadow for HVA and non-shadow for non HVA).
-    if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
+    if (MCRegister Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
       // In Vectorcall Calling convention, additional shadow stack can be
       // created on top of the basic 32 bytes of win64.
       // It can happen if the fifth or sixth argument is vector type or HVA.
@@ -209,7 +209,7 @@ static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
     return true; // If this is an HVA - Stop the search.
 
   // Assign XMM register.
-  if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
+  if (MCRegister Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     return true;
   }
@@ -259,7 +259,7 @@ static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   // If there are no pending members, we are not in the middle of a split,
   // so do the usual inreg stuff.
   if (PendingMembers.empty()) {
-    if (unsigned Reg = State.AllocateReg(RegList)) {
+    if (MCRegister Reg = State.AllocateReg(RegList)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return true;
     }
diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp
index ec6ef56a66fa0f..6a3030bfc1b7e3 100644
--- a/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -160,7 +160,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
       ListInit *RegList = Action->getValueAsListInit("RegList");
       if (RegList->size() == 1) {
         std::string Name = getQualifiedName(RegList->getElementAsRecord(0));
-        O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name
+        O << IndentStr << "if (MCRegister Reg = State.AllocateReg(" << Name
           << ")) {\n";
         if (SwiftAction)
           AssignedSwiftRegsMap[CurrentAction].insert(Name);
@@ -180,7 +180,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
           O << LS << Name;
         }
         O << "\n" << IndentStr << "};\n";
-        O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList"
+        O << IndentStr << "if (MCRegister Reg = State.AllocateReg(RegList"
           << Counter << ")) {\n";
       }
       O << IndentStr << "  State.addLoc(CCValAssign::getReg(ValNo, ValVT, "
@@ -217,7 +217,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
                         "Invalid length of list of shadowed registers");
 
       if (RegList->size() == 1) {
-        O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
+        O << IndentStr << "if (MCRegister Reg = State.AllocateReg(";
         O << getQualifiedName(RegList->getElementAsRecord(0));
         O << ", " << getQualifiedName(ShadowRegList->getElementAsRecord(0));
         O << ")) {\n";
@@ -241,7 +241,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
           O << LSS << getQualifiedName(ShadowRegList->getElementAsRecord(i));
         O << "\n" << IndentStr << "};\n";
 
-        O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList"
+        O << IndentStr << "if (MCRegister Reg = State.AllocateReg(RegList"
           << RegListNumber << ", "
           << "RegList" << ShadowRegListNumber << ")) {\n";
       }