halide · rootjalex · Apr 30, 2024 · Aug 23, 2023 · Aug 24, 2023 · Aug 24, 2023
diff --git a/src/Bounds.cpp b/src/Bounds.cpp
@@ -1517,15 +1517,28 @@ class Bounds : public IRVisitor {
                 result.include(arg_bounds.get(i));
             }
             interval = result;
-        } else if (op->is_intrinsic(Call::widen_right_add)) {
-            Expr add = Add::make(op->args[0], cast(op->args[0].type(), op->args[1]));
-            add.accept(this);
-        } else if (op->is_intrinsic(Call::widen_right_sub)) {
-            Expr sub = Sub::make(op->args[0], cast(op->args[0].type(), op->args[1]));
-            sub.accept(this);
-        } else if (op->is_intrinsic(Call::widen_right_mul)) {
-            Expr mul = Mul::make(op->args[0], cast(op->args[0].type(), op->args[1]));
-            mul.accept(this);
+        } else if (op->is_intrinsic({Call::widening_add,
+                                     Call::widening_mul,
+                                     Call::widening_shift_left,
+                                     Call::widening_shift_right,
+                                     Call::widening_sub,
+                                     Call::widen_right_add,
+                                     Call::widen_right_sub,
+                                     Call::widen_right_mul,
+                                     // TODO: the below intrinsics should not use the optimal lowering,
+                                     // because that's harder for bounds inference to reason about.
+                                     Call::rounding_halving_add,
+                                     Call::halving_add,
+                                     Call::saturating_add,
+                                     Call::saturating_sub,
+                                     Call::halving_sub,
+                                     Call::rounding_shift_left,
+                                     Call::rounding_shift_right,
+                                     Call::mul_shift_right,
+                                     Call::rounding_mul_shift_right})) {
+            Expr a = lower_intrinsic(op);
+            internal_assert(a.defined());
+            a.accept(this);
         } else if (op->call_type == Call::Halide) {
             bounds_of_func(op->name, op->value_index, op->type);
         } else {

diff --git a/src/CodeGen_X86.cpp b/src/CodeGen_X86.cpp
@@ -1,3 +1,4 @@
+#include "Bounds.h"
 #include "CodeGen_Internal.h"
 #include "CodeGen_Posix.h"
 #include "ConciseCasts.h"
@@ -488,7 +489,6 @@ void CodeGen_X86::visit(const Select *op) {
 }
 
 void CodeGen_X86::visit(const Cast *op) {
-
     if (!op->type.is_vector()) {
         // We only have peephole optimizations for vectors in here.
         CodeGen_Posix::visit(op);
@@ -501,7 +501,7 @@ void CodeGen_X86::visit(const Cast *op) {
     };
 
     // clang-format off
-    static Pattern patterns[] = {
+    static const Pattern patterns[] = {
         // This isn't rounding_multiply_quantzied(i16, i16, 15) because it doesn't
         // saturate the result.
         {"pmulhrs", i16(rounding_shift_right(widening_mul(wild_i16x_, wild_i16x_), 15))},
@@ -611,7 +611,7 @@ void CodeGen_X86::visit(const Call *op) {
     };
 
     // clang-format off
-    static Pattern patterns[] = {
+    static const Pattern patterns[] = {
         {"pmulh", mul_shift_right(wild_i16x_, wild_i16x_, 16)},
         {"pmulh", mul_shift_right(wild_u16x_, wild_u16x_, 16)},
         {"saturating_narrow", i16_sat(wild_i32x_)},
@@ -631,6 +631,37 @@ void CodeGen_X86::visit(const Call *op) {
         }
     }
 
+    // clang-format off
+    static const Pattern reinterpret_patterns[] = {
+        {"saturating_narrow", i16_sat(wild_u32x_)},
+        {"saturating_narrow", u16_sat(wild_u32x_)},
+        {"saturating_narrow", i8_sat(wild_u16x_)},
+        {"saturating_narrow", u8_sat(wild_u16x_)},
+    };
+    // clang-format on
+
+    // Search for saturating casts where the inner value can be
+    // reinterpreted to signed, so that we can use existing
+    // saturating_narrow patterns.
+    for (const auto &pattern : reinterpret_patterns) {
+        if (expr_match(pattern.pattern, op, matches)) {
+            const Expr &expr = matches[0];
+            Expr upper_bound = find_constant_bound(expr, Direction::Upper);
+            if (const uint64_t *bound = as_const_uint(upper_bound)) {
+                Type t = matches[0].type();
+                if (*bound <= (uint64_t)max_int(t.bits())) {
+                    // Can safely reinterpret to signed integer.
+                    matches[0] = cast(t.with_code(halide_type_int), matches[0]);
+                    value = call_overloaded_intrin(op->type, pattern.intrin, matches);
+                    if (value) {
+                        return;
+                    }
+                }
+            }
+            break;
+        }
+    }
+
     static const vector<pair<Expr, Expr>> cast_rewrites = {
         // Some double-narrowing saturating casts can be better expressed as
         // combinations of single-narrowing saturating casts.

diff --git a/src/Type.cpp b/src/Type.cpp
@@ -6,7 +6,6 @@ namespace Halide {
 
 using std::ostringstream;
 
-namespace {
 uint64_t max_uint(int bits) {
     uint64_t max_val = 0xffffffffffffffffULL;
     return max_val >> (64 - bits);
@@ -21,8 +20,6 @@ int64_t min_int(int bits) {
     return -max_int(bits) - 1;
 }
 
-}  // namespace
-
 /** Return an expression which is the maximum value of this type */
 Halide::Expr Type::max() const {
     if (is_vector()) {

diff --git a/src/Type.h b/src/Type.h
@@ -561,6 +561,13 @@ inline Type type_of() {
 /** Halide type to a C++ type */
 std::string type_to_c_type(Type type, bool include_space, bool c_plus_plus = true);
 
+/** Returns maximum representable unsigned integer. */
+uint64_t max_uint(int bits);
+/** Returns maximum representable signed integer. */
+int64_t max_int(int bits);
+/** Returns minimum representable signed integer. */
+int64_t min_int(int bits);
+
 }  // namespace Halide
 
 #endif