Skip to content

Commit

Permalink
Outsmart the LLVM optimizer (halide#8073)
Browse files Browse the repository at this point in the history
The old definitions of bool_1, bool_2, bool_3 in simd_op_check_x86 (etc) all referred to the same entry in in_f32; as of llvm/llvm-project#76367, the LLVM optimizer is smart enough to realize that (eg) bool1 != bool2 by construction, and optimizes away the code that tests their conditions, such as the one for andps and orps. Initing them from different locations is enough to outsmart the compiler.

(bug was only noticed in the x86 test, but I updated the other tests to guard against future improvements there too.)
  • Loading branch information
steven-johnson authored and ardier committed Mar 3, 2024
1 parent ce4cad5 commit ab172b2
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion test/correctness/simd_op_check_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class SimdOpCheckARM : public SimdOpCheckTest {
Expr u32_1 = in_u32(x), u32_2 = in_u32(x + 16), u32_3 = in_u32(x + 32);
Expr i64_1 = in_i64(x), i64_2 = in_i64(x + 16), i64_3 = in_i64(x + 32);
Expr u64_1 = in_u64(x), u64_2 = in_u64(x + 16), u64_3 = in_u64(x + 32);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_1 < -0.3f), bool_3 = (f32_1 != -0.34f);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_2 < -0.3f), bool_3 = (f32_3 != -0.34f);

// Table copied from the Cortex-A9 TRM.

Expand Down
2 changes: 1 addition & 1 deletion test/correctness/simd_op_check_hvx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class SimdOpCheckHVX : public SimdOpCheckTest {
Expr u32_1 = in_u32(x), u32_2 = in_u32(x + 16), u32_3 = in_u32(x + 32);
Expr i64_1 = in_i64(x), i64_2 = in_i64(x + 16), i64_3 = in_i64(x + 32);
Expr u64_1 = in_u64(x), u64_2 = in_u64(x + 16), u64_3 = in_u64(x + 32);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_1 < -0.3f), bool_3 = (f32_1 != -0.34f);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_2 < -0.3f), bool_3 = (f32_3 != -0.34f);

constexpr int hvx_width = 128;

Expand Down
2 changes: 1 addition & 1 deletion test/correctness/simd_op_check_powerpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class SimdOpCheckPowerPC : public SimdOpCheckTest {
Expr u32_1 = in_u32(x), u32_2 = in_u32(x + 16), u32_3 = in_u32(x + 32);
Expr i64_1 = in_i64(x), i64_2 = in_i64(x + 16), i64_3 = in_i64(x + 32);
Expr u64_1 = in_u64(x), u64_2 = in_u64(x + 16), u64_3 = in_u64(x + 32);
// Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_1 < -0.3f), bool_3 = (f32_1 != -0.34f);
// Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_2 < -0.3f), bool_3 = (f32_3 != -0.34f);

// Basic AltiVec SIMD instructions.
for (int w = 1; w <= 4; w++) {
Expand Down
2 changes: 1 addition & 1 deletion test/correctness/simd_op_check_wasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class SimdOpCheckWASM : public SimdOpCheckTest {
Expr u32_1 = in_u32(x), u32_2 = in_u32(x + 16), u32_3 = in_u32(x + 32);
Expr i64_1 = in_i64(x), i64_2 = in_i64(x + 16), i64_3 = in_i64(x + 32);
Expr u64_1 = in_u64(x), u64_2 = in_u64(x + 16), u64_3 = in_u64(x + 32);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_1 < -0.3f), bool_3 = (f32_1 != -0.34f);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_2 < -0.3f), bool_3 = (f32_3 != -0.34f);

check("f32.sqrt", 1, sqrt(f32_1));
check("f32.min", 1, min(f32_1, f32_2));
Expand Down
2 changes: 1 addition & 1 deletion test/correctness/simd_op_check_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class SimdOpCheckX86 : public SimdOpCheckTest {
Expr u32_1 = in_u32(x), u32_2 = in_u32(x + 16), u32_3 = in_u32(x + 32);
Expr i64_1 = in_i64(x), i64_2 = in_i64(x + 16), i64_3 = in_i64(x + 32);
Expr u64_1 = in_u64(x), u64_2 = in_u64(x + 16), u64_3 = in_u64(x + 32);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_1 < -0.3f), bool_3 = (f32_1 != -0.34f);
Expr bool_1 = (f32_1 > 0.3f), bool_2 = (f32_2 < -0.3f), bool_3 = (f32_3 != -0.34f);

// MMX and SSE1 (in 64 and 128 bits)
for (int w = 1; w <= 4; w++) {
Expand Down

0 comments on commit ab172b2

Please sign in to comment.