-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Merge base and offset for large offsets #113277
Merged
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-loongarch Author: hev (heiher) ChangesThis PR merges large offsets into the base address loading. Full diff: https://github.com/llvm/llvm-project/pull/113277.diff 2 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index 47071d29c2cd44..e9455fdd23ba54 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
int64_t Offset) {
- assert(isInt<32>(Offset) && "Unexpected offset");
// Put the offset back in Hi and the Lo
Hi20.getOperand(1).setOffset(Offset);
Lo12.getOperand(2).setOffset(Offset);
@@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
// instructions and deletes TailAdd and the instructions that produced the
// offset.
//
-// Base address lowering is of the form:
-// Hi20: pcalau12i vreg1, %pc_hi20(s)
-// Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
-// / \
-// / \
-// / \
-// / The large offset can be of two forms: \
-// 1) Offset that has non zero bits in lower 2) Offset that has non zero
-// 12 bits and upper 20 bits bits in upper 20 bits only
-// OffsetHi: lu12i.w vreg3, 4
-// OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
-// \ /
-// \ /
-// \ /
-// \ /
-// TailAdd: add.d vreg4, vreg2, voff
+// (The instructions marked with "!" are not necessarily present)
+//
+// Base address lowering is of the form:
+// Hi20: pcalau12i vreg1, %pc_hi20(s)
+// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
+// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
+// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+// |
+// | The large offset can be one of the forms:
+// |
+// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
+// | OffsetHi20: lu12i.w vreg3, 4
+// | OffsetLo12: ori voff, vreg3, 188 ------------------+
+// | |
+// +-> 2) Offset that has non zero bits in Hi20 bits only: |
+// | OffsetHi20: lu12i.w voff, 128 ------------------+
+// | |
+// +-> 3) Offset that has non zero bits in Lo20 bits: |
+// | OffsetHi20: lu12i.w vreg3, 121 ! |
+// | OffsetLo12: ori voff, vreg3, 122 ! |
+// | OffsetLo20: lu32i.d voff, 123 ------------------+
+// +-> 4) Offset that has non zero bits in Hi12 bits: |
+// OffsetHi20: lu12i.w vreg3, 121 ! |
+// OffsetLo12: ori voff, vreg3, 122 ! |
+// OffsetLo20: lu32i.d vreg3, 123 ! |
+// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
+// |
+// TailAdd: add.d vreg4, vreg2, voff <------------------+
+//
bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
@@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
Register Rs = TailAdd.getOperand(1).getReg();
Register Rt = TailAdd.getOperand(2).getReg();
Register Reg = Rs == GAReg ? Rt : Rs;
+ SmallVector<MachineInstr *, 4> Instrs;
+ int64_t Offset = 0;
+ int64_t Mask = -1;
+
+ // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
+ for (int i = 0; i < 4; i++) {
+ // Handle Reg is R0.
+ if (Reg == LoongArch::R0)
+ break;
- // Can't fold if the register has more than one use.
- if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
- return false;
- // This can point to an ORI or a LU12I.W:
- MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
- if (OffsetTail.getOpcode() == LoongArch::ORI) {
- // The offset value has non zero bits in both %hi and %lo parts.
- // Detect an ORI that feeds from a LU12I.W instruction.
- MachineOperand &OriImmOp = OffsetTail.getOperand(2);
- if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
+ // Can't fold if the register has more than one use.
+ if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
return false;
- Register OriReg = OffsetTail.getOperand(1).getReg();
- int64_t OffLo = OriImmOp.getImm();
-
- // Handle rs1 of ORI is R0.
- if (OriReg == LoongArch::R0) {
- LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
- foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
- OffsetTail.eraseFromParent();
- return true;
- }
- MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
- MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
- if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
- Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
- !MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
+ MachineInstr *Curr = MRI->getVRegDef(Reg);
+ if (!Curr)
+ break;
+
+ switch (Curr->getOpcode()) {
+ default:
+ // Can't fold if the instruction opcode is unexpected.
return false;
- int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
- Offset += OffLo;
- // LU12I.W+ORI sign extends the result.
- Offset = SignExtend64<32>(Offset);
- LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
- << " " << OffsetLu12i);
- foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
- OffsetTail.eraseFromParent();
- OffsetLu12i.eraseFromParent();
- return true;
- } else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
- // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
- // exists.
- LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
- int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
- foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
- OffsetTail.eraseFromParent();
- return true;
+ case LoongArch::ORI: {
+ MachineOperand ImmOp = Curr->getOperand(2);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+ return false;
+ Offset += ImmOp.getImm();
+ Reg = Curr->getOperand(1).getReg();
+ Instrs.push_back(Curr);
+ break;
+ }
+ case LoongArch::LU12I_W: {
+ MachineOperand ImmOp = Curr->getOperand(1);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
+ return false;
+ Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
+ Reg = LoongArch::R0;
+ Instrs.push_back(Curr);
+ break;
+ }
+ case LoongArch::LU32I_D: {
+ MachineOperand ImmOp = Curr->getOperand(2);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
+ return false;
+ Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
+ Mask ^= 0x000FFFFF00000000ULL;
+ Reg = Curr->getOperand(1).getReg();
+ Instrs.push_back(Curr);
+ break;
+ }
+ case LoongArch::LU52I_D: {
+ MachineOperand ImmOp = Curr->getOperand(2);
+ if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
+ return false;
+ Offset += ImmOp.getImm() << 52;
+ Mask ^= 0xFFF0000000000000ULL;
+ Reg = Curr->getOperand(1).getReg();
+ Instrs.push_back(Curr);
+ break;
+ }
+ }
}
- return false;
+
+ // Can't fold if the offset is not extracted.
+ if (!Offset)
+ return false;
+
+ foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
+ LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
+ for (auto I : Instrs) {
+ LLVM_DEBUG(dbgs() << " " << *I);
+ I->eraseFromParent();
+ }
+
+ return true;
}
bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
@@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
[[fallthrough]];
case LoongArch::ADD_D:
// The offset is too large to fit in the immediate field of ADDI.
- // This can be in two forms:
- // 1) LU12I.W hi_offset followed by:
- // ORI lo_offset
- // This happens in case the offset has non zero bits in
- // both hi 20 and lo 12 bits.
- // 2) LU12I.W (offset20)
- // This happens in case the lower 12 bits of the offset are zeros.
return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
break;
}
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
index b53f94303b6ea4..9df5532d51179e 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
@@ -1100,14 +1100,11 @@ define dso_local ptr @load_addr_offset_281474439839744() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_281474439839744:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+2251795518717952)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+2251795518717952)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+2251795518717952)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: ori $a1, $zero, 0
-; LA64-LARGE-NEXT: lu32i.d $a1, 524287
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744)
@@ -1131,14 +1128,11 @@ define dso_local ptr @load_addr_offset_248792680471040() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_248792680471040:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+1990341443768320)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+1990341443768320)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+1990341443768320)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu12i.w $a1, 502733
-; LA64-LARGE-NEXT: lu32i.d $a1, 463412
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040)
@@ -1163,15 +1157,11 @@ define dso_local ptr @load_addr_offset_9380351707272() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_9380351707272:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+75042813658176)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+75042813658176)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+75042813658176)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+75042813658176)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu12i.w $a1, 279556
-; LA64-LARGE-NEXT: ori $a1, $a1, 1088
-; LA64-LARGE-NEXT: lu32i.d $a1, 17472
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272)
@@ -1194,13 +1184,11 @@ define dso_local ptr @load_addr_offset_562949953421312() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_562949953421312:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4503599627370496)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4503599627370496)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4503599627370496)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu52i.d $a1, $zero, 1
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312)
@@ -1226,16 +1214,11 @@ define dso_local ptr @load_addr_offset_614749556925924693() nounwind {
;
; LA64-LARGE-LABEL: load_addr_offset_614749556925924693:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a64+4917996455407397544)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a64+4917996455407397544)
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
-; LA64-LARGE-NEXT: lu12i.w $a1, 209666
-; LA64-LARGE-NEXT: ori $a1, $a1, 2728
-; LA64-LARGE-NEXT: lu32i.d $a1, 15288
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092
-; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)
|
SixWeining
approved these changes
Oct 23, 2024
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Closed
NoumanAmir657
pushed a commit
to NoumanAmir657/llvm-project
that referenced
this pull request
Nov 4, 2024
This PR merges large offsets into the base address loading.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This PR merges large offsets into the base address loading.