Skip to content

Commit

Permalink
[X86] Enable TuningSlowDivide64 on Barcelona/Bobcat/Bulldozer/Ryzen F…
Browse files Browse the repository at this point in the history
…amilies (#91277)

Despite most AMD cpus having a lower latency for i64 divisions that converge early, we are still better off testing for values representable as i32 and performing a i32 division if possible.

All AMD cpus appear to have been missed when we added the "idivq-to-divl" attribute - this patch now matches Intel cpu behaviour (and the x86-64/v2/3/4 levels).

Unfortunately the difference in code scheduling means I've had to stop using the update_llc_test_checks script and just use old-fashioned CHECK-DAG checks for divl/divq pairs.

Fixes #90985
  • Loading branch information
RKSimon authored May 9, 2024
1 parent e0d8dbc commit 8b400de
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 48 deletions.
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,7 @@ def ProcessorFeatures {
FeatureCMOV,
FeatureX86_64];
list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
TuningSlowDivide64,
TuningSlowSHLD,
TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
Expand All @@ -1372,6 +1373,7 @@ def ProcessorFeatures {
list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
TuningFastScalarShiftMasks,
TuningFastVectorShiftMasks,
TuningSlowDivide64,
TuningSlowSHLD,
TuningFastImm16,
TuningSBBDepBreaking,
Expand All @@ -1396,6 +1398,7 @@ def ProcessorFeatures {
TuningFastMOVBE,
TuningFastImm16,
TuningSBBDepBreaking,
TuningSlowDivide64,
TuningSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
Expand All @@ -1420,6 +1423,7 @@ def ProcessorFeatures {
FeatureLWP,
FeatureLAHFSAHF64];
list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
TuningSlowDivide64,
TuningFast11ByteNOP,
TuningFastScalarShiftMasks,
TuningBranchFusion,
Expand Down Expand Up @@ -1500,6 +1504,7 @@ def ProcessorFeatures {
TuningFastVariablePerLaneShuffle,
TuningFastMOVBE,
TuningFastImm16,
TuningSlowDivide64,
TuningSlowSHLD,
TuningSBBDepBreaking,
TuningInsertVZEROUPPER,
Expand Down
95 changes: 47 additions & 48 deletions llvm/test/CodeGen/X86/bypass-slow-division-64.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; Check that 64-bit division is bypassed correctly.
; RUN: llc < %s -mtriple=x86_64-- -mattr=-idivq-to-divl | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mattr=+idivq-to-divl | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
Expand All @@ -13,17 +12,17 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=alderlake | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; AMD
; RUN: llc < %s -mtriple=x86_64-- -mcpu=barcelona | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver1 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=barcelona | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver1 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ

; Additional tests for 64-bit divide bypass

Expand All @@ -41,18 +40,18 @@ define i64 @sdiv_quotient(i64 %a, i64 %b) nounwind {
;
; SLOW-DIVQ-LABEL: sdiv_quotient:
; SLOW-DIVQ: # %bb.0:
; SLOW-DIVQ-NEXT: movq %rdi, %rax
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
; SLOW-DIVQ-NEXT: shrq $32, %rcx
; SLOW-DIVQ-DAG: movq %rdi, %rax
; SLOW-DIVQ-DAG: movq %rdi, %rcx
; SLOW-DIVQ-DAG: orq %rsi, %rcx
; SLOW-DIVQ-DAG: shrq $32, %rcx
; SLOW-DIVQ-NEXT: je .LBB0_1
; SLOW-DIVQ-NEXT: # %bb.2:
; SLOW-DIVQ-NEXT: cqto
; SLOW-DIVQ-NEXT: idivq %rsi
; SLOW-DIVQ-NEXT: retq
; SLOW-DIVQ-NEXT: .LBB0_1:
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-DAG: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divl %esi
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax
; SLOW-DIVQ-NEXT: retq
Expand Down Expand Up @@ -93,19 +92,19 @@ define i64 @sdiv_remainder(i64 %a, i64 %b) nounwind {
;
; SLOW-DIVQ-LABEL: sdiv_remainder:
; SLOW-DIVQ: # %bb.0:
; SLOW-DIVQ-NEXT: movq %rdi, %rax
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
; SLOW-DIVQ-NEXT: shrq $32, %rcx
; SLOW-DIVQ-DAG: movq %rdi, %rax
; SLOW-DIVQ-DAG: movq %rdi, %rcx
; SLOW-DIVQ-DAG: orq %rsi, %rcx
; SLOW-DIVQ-DAG: shrq $32, %rcx
; SLOW-DIVQ-NEXT: je .LBB3_1
; SLOW-DIVQ-NEXT: # %bb.2:
; SLOW-DIVQ-NEXT: cqto
; SLOW-DIVQ-NEXT: idivq %rsi
; SLOW-DIVQ-NEXT: movq %rdx, %rax
; SLOW-DIVQ-NEXT: retq
; SLOW-DIVQ-NEXT: .LBB3_1:
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-DAG: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divl %esi
; SLOW-DIVQ-NEXT: movl %edx, %eax
; SLOW-DIVQ-NEXT: retq
Expand Down Expand Up @@ -148,19 +147,19 @@ define i64 @sdiv_quotient_and_remainder(i64 %a, i64 %b) nounwind {
;
; SLOW-DIVQ-LABEL: sdiv_quotient_and_remainder:
; SLOW-DIVQ: # %bb.0:
; SLOW-DIVQ-NEXT: movq %rdi, %rax
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
; SLOW-DIVQ-NEXT: shrq $32, %rcx
; SLOW-DIVQ-DAG: movq %rdi, %rax
; SLOW-DIVQ-DAG: movq %rdi, %rcx
; SLOW-DIVQ-DAG: orq %rsi, %rcx
; SLOW-DIVQ-DAG: shrq $32, %rcx
; SLOW-DIVQ-NEXT: je .LBB6_1
; SLOW-DIVQ-NEXT: # %bb.2:
; SLOW-DIVQ-NEXT: cqto
; SLOW-DIVQ-NEXT: idivq %rsi
; SLOW-DIVQ-NEXT: addq %rdx, %rax
; SLOW-DIVQ-NEXT: retq
; SLOW-DIVQ-NEXT: .LBB6_1:
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-DAG: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divl %esi
; SLOW-DIVQ-NEXT: # kill: def $edx killed $edx def $rdx
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax
Expand Down Expand Up @@ -214,18 +213,18 @@ define i64 @udiv_quotient(i64 %a, i64 %b) nounwind {
;
; SLOW-DIVQ-LABEL: udiv_quotient:
; SLOW-DIVQ: # %bb.0:
; SLOW-DIVQ-NEXT: movq %rdi, %rax
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
; SLOW-DIVQ-NEXT: shrq $32, %rcx
; SLOW-DIVQ-DAG: movq %rdi, %rax
; SLOW-DIVQ-DAG: movq %rdi, %rcx
; SLOW-DIVQ-DAG: orq %rsi, %rcx
; SLOW-DIVQ-DAG: shrq $32, %rcx
; SLOW-DIVQ-NEXT: je .LBB9_1
; SLOW-DIVQ-NEXT: # %bb.2:
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divq %rsi
; SLOW-DIVQ-NEXT: retq
; SLOW-DIVQ-NEXT: .LBB9_1:
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-DAG: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divl %esi
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax
; SLOW-DIVQ-NEXT: retq
Expand Down Expand Up @@ -266,19 +265,19 @@ define i64 @udiv_remainder(i64 %a, i64 %b) nounwind {
;
; SLOW-DIVQ-LABEL: udiv_remainder:
; SLOW-DIVQ: # %bb.0:
; SLOW-DIVQ-NEXT: movq %rdi, %rax
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
; SLOW-DIVQ-NEXT: shrq $32, %rcx
; SLOW-DIVQ-DAG: movq %rdi, %rax
; SLOW-DIVQ-DAG: movq %rdi, %rcx
; SLOW-DIVQ-DAG: orq %rsi, %rcx
; SLOW-DIVQ-DAG: shrq $32, %rcx
; SLOW-DIVQ-NEXT: je .LBB12_1
; SLOW-DIVQ-NEXT: # %bb.2:
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divq %rsi
; SLOW-DIVQ-NEXT: movq %rdx, %rax
; SLOW-DIVQ-NEXT: retq
; SLOW-DIVQ-NEXT: .LBB12_1:
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-DAG: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divl %esi
; SLOW-DIVQ-NEXT: movl %edx, %eax
; SLOW-DIVQ-NEXT: retq
Expand Down Expand Up @@ -321,19 +320,19 @@ define i64 @udiv_quotient_and_remainder(i64 %a, i64 %b) nounwind {
;
; SLOW-DIVQ-LABEL: udiv_quotient_and_remainder:
; SLOW-DIVQ: # %bb.0:
; SLOW-DIVQ-NEXT: movq %rdi, %rax
; SLOW-DIVQ-NEXT: movq %rdi, %rcx
; SLOW-DIVQ-NEXT: orq %rsi, %rcx
; SLOW-DIVQ-NEXT: shrq $32, %rcx
; SLOW-DIVQ-DAG: movq %rdi, %rax
; SLOW-DIVQ-DAG: movq %rdi, %rcx
; SLOW-DIVQ-DAG: orq %rsi, %rcx
; SLOW-DIVQ-DAG: shrq $32, %rcx
; SLOW-DIVQ-NEXT: je .LBB15_1
; SLOW-DIVQ-NEXT: # %bb.2:
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divq %rsi
; SLOW-DIVQ-NEXT: addq %rdx, %rax
; SLOW-DIVQ-NEXT: retq
; SLOW-DIVQ-NEXT: .LBB15_1:
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-NEXT: xorl %edx, %edx
; SLOW-DIVQ-DAG: # kill: def $eax killed $eax killed $rax
; SLOW-DIVQ-DAG: xorl %edx, %edx
; SLOW-DIVQ-NEXT: divl %esi
; SLOW-DIVQ-NEXT: # kill: def $edx killed $edx def $rdx
; SLOW-DIVQ-NEXT: # kill: def $eax killed $eax def $rax
Expand Down

0 comments on commit 8b400de

Please sign in to comment.