Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LoongArch] Merge base and offset for large offsets #113277

Merged
merged 1 commit into from
Oct 23, 2024

Conversation

heiher
Copy link
Member

@heiher heiher commented Oct 22, 2024

This PR merges large offsets into the base address loading.

@llvmbot
Copy link
Member

llvmbot commented Oct 22, 2024

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

This PR merges large offsets into the base address loading.


Full diff: https://github.com/llvm/llvm-project/pull/113277.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp (+99-68)
  • (modified) llvm/test/CodeGen/LoongArch/merge-base-offset.ll (+20-37)
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index 47071d29c2cd44..e9455fdd23ba54 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
     int64_t Offset) {
-  assert(isInt<32>(Offset) && "Unexpected offset");
   // Put the offset back in Hi and the Lo
   Hi20.getOperand(1).setOffset(Offset);
   Lo12.getOperand(2).setOffset(Offset);
@@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
 // instructions and deletes TailAdd and the instructions that produced the
 // offset.
 //
-//                     Base address lowering is of the form:
-//                       Hi20:  pcalau12i vreg1, %pc_hi20(s)
-//                       Lo12:  addi.d vreg2, vreg1, %pc_lo12(s)
-//                       /                                  \
-//                      /                                    \
-//                     /                                      \
-//                    /  The large offset can be of two forms: \
-//  1) Offset that has non zero bits in lower      2) Offset that has non zero
-//     12 bits and upper 20 bits                      bits in upper 20 bits only
-//   OffsetHi: lu12i.w vreg3, 4
-//   OffsetLo: ori voff, vreg3, 188                 OffsetHi: lu12i.w voff, 128
-//                    \                                        /
-//                     \                                      /
-//                      \                                    /
-//                       \                                  /
-//                        TailAdd: add.d  vreg4, vreg2, voff
+//   (The instructions marked with "!" are not necessarily present)
+//
+//        Base address lowering is of the form:
+//           Hi20:  pcalau12i vreg1, %pc_hi20(s)
+//        +- Lo12:  addi.d vreg2, vreg1, %pc_lo12(s)
+//        |  Lo20:  lu32i.d vreg2, %pc64_lo20(s) !
+//        +- Hi12:  lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+//        |
+//        | The large offset can be one of the forms:
+//        |
+//        +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
+//        |     OffsetHi20: lu12i.w vreg3, 4
+//        |     OffsetLo12: ori voff, vreg3, 188    ------------------+
+//        |                                                           |
+//        +-> 2) Offset that has non zero bits in Hi20 bits only:     |
+//        |     OffsetHi20: lu12i.w voff, 128       ------------------+
+//        |                                                           |
+//        +-> 3) Offset that has non zero bits in Lo20 bits:          |
+//        |     OffsetHi20: lu12i.w vreg3, 121 !                      |
+//        |     OffsetLo12: ori voff, vreg3, 122 !                    |
+//        |     OffsetLo20: lu32i.d voff, 123       ------------------+
+//        +-> 4) Offset that has non zero bits in Hi12 bits:          |
+//              OffsetHi20: lu12i.w vreg3, 121 !                      |
+//              OffsetLo12: ori voff, vreg3, 122 !                    |
+//              OffsetLo20: lu32i.d vreg3, 123 !                      |
+//              OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
+//                                                                    |
+//        TailAdd: add.d  vreg4, vreg2, voff       <------------------+
+//
 bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
     MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
     MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
@@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
   Register Rs = TailAdd.getOperand(1).getReg();
   Register Rt = TailAdd.getOperand(2).getReg();
   Register Reg = Rs == GAReg ? Rt : Rs;
+  SmallVector<MachineInstr *, 4> Instrs;
+  int64_t Offset = 0;
+  int64_t Mask = -1;
+
+  // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
+  for (int i = 0; i < 4; i++) {
+    // Handle Reg is R0.
+    if (Reg == LoongArch::R0)
+      break;
 
-  // Can't fold if the register has more than one use.
-  if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
-    return false;
-  // This can point to an ORI or a LU12I.W:
-  MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
-  if (OffsetTail.getOpcode() == LoongArch::ORI) {
-    // The offset value has non zero bits in both %hi and %lo parts.
-    // Detect an ORI that feeds from a LU12I.W instruction.
-    MachineOperand &OriImmOp = OffsetTail.getOperand(2);
-    if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
+    // Can't fold if the register has more than one use.
+    if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
       return false;
-    Register OriReg = OffsetTail.getOperand(1).getReg();
-    int64_t OffLo = OriImmOp.getImm();
-
-    // Handle rs1 of ORI is R0.
-    if (OriReg == LoongArch::R0) {
-      LLVM_DEBUG(dbgs() << "  Offset Instrs: " << OffsetTail);
-      foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
-      OffsetTail.eraseFromParent();
-      return true;
-    }
 
-    MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
-    MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
-    if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
-        Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
-        !MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
+    MachineInstr *Curr = MRI->getVRegDef(Reg);
+    if (!Curr)
+      break;
+
+    switch (Curr->getOpcode()) {
+    default:
+      // Can't fold if the instruction opcode is unexpected.
       return false;
-    int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
-    Offset += OffLo;
-    // LU12I.W+ORI sign extends the result.
-    Offset = SignExtend64<32>(Offset);
-    LLVM_DEBUG(dbgs() << "  Offset Instrs: " << OffsetTail
-                      << "                 " << OffsetLu12i);
-    foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
-    OffsetTail.eraseFromParent();
-    OffsetLu12i.eraseFromParent();
-    return true;
-  } else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
-    // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
-    // exists.
-    LLVM_DEBUG(dbgs() << "  Offset Instr: " << OffsetTail);
-    int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
-    foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
-    OffsetTail.eraseFromParent();
-    return true;
+    case LoongArch::ORI: {
+      MachineOperand ImmOp = Curr->getOperand(2);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+        return false;
+      Offset += ImmOp.getImm();
+      Reg = Curr->getOperand(1).getReg();
+      Instrs.push_back(Curr);
+      break;
+    }
+    case LoongArch::LU12I_W: {
+      MachineOperand ImmOp = Curr->getOperand(1);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+        return false;
+      Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
+      Reg = LoongArch::R0;
+      Instrs.push_back(Curr);
+      break;
+    }
+    case LoongArch::LU32I_D: {
+      MachineOperand ImmOp = Curr->getOperand(2);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
+        return false;
+      Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
+      Mask ^= 0x000FFFFF00000000ULL;
+      Reg = Curr->getOperand(1).getReg();
+      Instrs.push_back(Curr);
+      break;
+    }
+    case LoongArch::LU52I_D: {
+      MachineOperand ImmOp = Curr->getOperand(2);
+      if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
+        return false;
+      Offset += ImmOp.getImm() << 52;
+      Mask ^= 0xFFF0000000000000ULL;
+      Reg = Curr->getOperand(1).getReg();
+      Instrs.push_back(Curr);
+      break;
+    }
+    }
   }
-  return false;
+
+  // Can't fold if the offset is not extracted.
+  if (!Offset)
+    return false;
+
+  foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
+  LLVM_DEBUG(dbgs() << "  Offset Instrs:\n");
+  for (auto I : Instrs) {
+    LLVM_DEBUG(dbgs() << "                 " << *I);
+    I->eraseFromParent();
+  }
+
+  return true;
 }
 
 bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
@@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
     [[fallthrough]];
   case LoongArch::ADD_D:
     // The offset is too large to fit in the immediate field of ADDI.
-    // This can be in two forms:
-    // 1) LU12I.W hi_offset followed by:
-    //    ORI lo_offset
-    //    This happens in case the offset has non zero bits in
-    //    both hi 20 and lo 12 bits.
-    // 2) LU12I.W (offset20)
-    //    This happens in case the lower 12 bits of the offset are zeros.
     return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
     break;
   }
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
index b53f94303b6ea4..9df5532d51179e 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
@@ -1100,14 +1100,11 @@ define dso_local ptr @load_addr_offset_281474439839744() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_281474439839744:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+2251795518717952)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+2251795518717952)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    ori $a1, $zero, 0
-; LA64-LARGE-NEXT:    lu32i.d $a1, 524287
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744)
@@ -1131,14 +1128,11 @@ define dso_local ptr @load_addr_offset_248792680471040() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_248792680471040:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+1990341443768320)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+1990341443768320)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu12i.w $a1, 502733
-; LA64-LARGE-NEXT:    lu32i.d $a1, 463412
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040)
@@ -1163,15 +1157,11 @@ define dso_local ptr @load_addr_offset_9380351707272() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_9380351707272:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+75042813658176)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+75042813658176)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+75042813658176)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+75042813658176)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu12i.w $a1, 279556
-; LA64-LARGE-NEXT:    ori $a1, $a1, 1088
-; LA64-LARGE-NEXT:    lu32i.d $a1, 17472
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272)
@@ -1194,13 +1184,11 @@ define dso_local ptr @load_addr_offset_562949953421312() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_562949953421312:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+4503599627370496)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+4503599627370496)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu52i.d $a1, $zero, 1
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312)
@@ -1226,16 +1214,11 @@ define dso_local ptr @load_addr_offset_614749556925924693() nounwind {
 ;
 ; LA64-LARGE-LABEL: load_addr_offset_614749556925924693:
 ; LA64-LARGE:       # %bb.0: # %entry
-; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT:    pcalau12i $a0, %pc_hi20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT:    addi.d $a1, $zero, %pc_lo12(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT:    lu32i.d $a1, %pc64_lo20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, %pc64_hi12(g_a64+4917996455407397544)
 ; LA64-LARGE-NEXT:    add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT:    lu12i.w $a1, 209666
-; LA64-LARGE-NEXT:    ori $a1, $a1, 2728
-; LA64-LARGE-NEXT:    lu32i.d $a1, 15288
-; LA64-LARGE-NEXT:    lu52i.d $a1, $a1, 1092
-; LA64-LARGE-NEXT:    add.d $a0, $a0, $a1
 ; LA64-LARGE-NEXT:    ret
 entry:
   ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)

Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@heiher heiher merged commit b225b15 into llvm:main Oct 23, 2024
10 checks passed
@heiher heiher deleted the merge-base-large-offset branch October 23, 2024 11:43
@frobtech frobtech mentioned this pull request Oct 25, 2024
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
This PR merges large offsets into the base address loading.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants