squash frac: Rewrite algorithm for speed and simplicity

RIOT-OS · Aug 14, 2018 · eac1520 · eac1520
1 parent 4d78aaa
commit eac1520
Show file tree

Hide file tree

Showing 4 changed files with 102 additions and 151 deletions.
diff --git a/sys/frac/frac.c b/sys/frac/frac.c
@@ -19,9 +19,8 @@
 #include <stdio.h>
 #include "assert.h"
 #include "frac.h"
-#include "libdivide.h"
 
-#define ENABLE_DEBUG (1)
+#define ENABLE_DEBUG (0)
 #include "debug.h"
 
 /**
@@ -86,11 +85,11 @@ uint32_t frac_long_divide(uint32_t num, uint32_t den, int *prec, uint32_t *rem)
     /* Binary long division with adaptive number of fractional bits */
     /* The result will be a Qx.y number where x is the number of bits in the
      * integer part and y = 64 - x. Similar to floating point, except the result
-     * is unsigned, and we can only represent numbers in the range 2**-64..(2**64 - 1) */
+     * is unsigned, and we can only represent numbers in the range 2**-32..(2**32 - 1) */
     assert(den); /* divide by zero */
 
     uint32_t q = 0; /* Quotient */
-    uint32_t r = 0;  /* Remainder */
+    uint64_t r = 0;  /* Remainder */
     if (prec) {
         *prec = 0;
     }
@@ -126,7 +125,7 @@ uint32_t frac_long_divide(uint32_t num, uint32_t den, int *prec, uint32_t *rem)
             break;
         }
     }
-    if (r > (den / 2)) {
+    if (r > 0) {
         ++q;
     }
     if (prec) {
@@ -152,28 +151,7 @@ void frac_init(frac_t *frac, uint32_t num, uint32_t den)
     int prec = 0;
     uint32_t rem = 0;
     frac->frac = frac_long_divide(num, den, &prec, &rem);
-    frac->shift = 32 - prec;
+    frac->shift = (sizeof(frac->frac) * 8) - prec;
     DEBUG("frac_init32: gcd = %" PRIu32 " num = %" PRIu32 " den = %" PRIu32 " frac = 0x%08" PRIx32 " shift = %02d, rem = 0x%08" PRIx32 "\n",
           gcd, num, den, frac->frac, frac->shift, rem);
-    frac->den = den;
-    frac->num = num;
-}
-
-uint32_t frac_scale(const frac_t *frac, uint32_t x)
-{
-    assert(frac);
-    uint64_t scaled = ((uint64_t)frac->frac * x) >> frac->shift;
-    DEBUG("frac_scale32: x = %" PRIu32 " * %" PRIu32 " / %" PRIu32 " = %" PRIu64 "\n",
-        x, frac->num, frac->den, scaled);
-    //~ /* integer quotient */
-    //~ uint64_t quot = libdivide_u64_do(x, &frac->div);
-    //~ /* remainder */
-    //~ uint64_t rem = x - (quot * frac->den);
-    //~ /* quot * frac->num may wrap around when num > den and x is "big" */
-    //~ /* rem * frac->num will never wrap around as long as both num and den are at
-     //~ * most 32 bits wide, u32 x u32 -> u64 */
-    //~ uint64_t scaled = quot * frac->num + libdivide_u64_do(rem * frac->num, &frac->div);
-    //~ DEBUG("frac_scale32: x = %" PRIu64 " quot = %" PRIu64 " rem = %" PRIu64
-          //~ " scaled = %" PRIu64 "\n", x, quot, rem, scaled);
-    return scaled;
 }
diff --git a/sys/include/frac.h b/sys/include/frac.h
@@ -20,6 +20,16 @@
  * Precomputing the frac_t values can be done via the application found in
  * `tests/frac-config` in the RIOT tree.
  *
+ * ### Numeric precision
+ *
+ * The algorithm will under certain circumstances give an incorrectly rounded
+ * result, more precisely, the result may sometimes be rounded up instead of
+ * rounded down when the product in the numerator, @$p = x \cdot num@$, would
+ * result in @$p >= 2^{31}@$. Fortunately, the relative error of this rounding
+ * mistake is small.
+ *
+ * This tradeoff is a design choice to make the algorithm faster.
+ *
  * @see       Libdivide homepage: http://libdivide.com/
  *
  * @file
@@ -33,7 +43,6 @@
 
 #include <assert.h>
 #include <stdint.h>
-#include "libdivide.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -43,8 +52,6 @@ extern "C" {
  * @brief   frac descriptor for fraction consisting of two 32 bit integers
  */
 typedef struct {
-    uint32_t num; /**< numerator */
-    uint32_t den; /**< denominator, needed for modulo operation */
     uint32_t frac; /**< fraction */
     uint8_t shift; /**< exponent */
 } frac_t;
@@ -55,7 +62,7 @@ typedef struct {
  * This function computes the mathematical parameters used by the frac algorithm.
  *
  * @note Be extra careful if @p num > @p den, the result from @ref frac_scale
- * may not fit in a 64 bit integer if @c x is very big.
+ * may not fit in a 32 bit integer if @c x is big.
  *
  * @pre @p den must not be 0
  *
@@ -66,17 +73,21 @@ typedef struct {
 void frac_init(frac_t *frac, uint32_t num, uint32_t den);
 
 /**
- * @brief Scale a 64 bit integer by a 32/32 integer fraction
+ * @brief   Scale a 32 bit integer by a 32/32 rational number
  *
- * @pre x * frac < 2**64, i.e. the result fits in a 64 bit integer
+ * @pre x * frac < 2**32, i.e. the result fits in a 32 bit integer
  *
  * @param[in]   frac  scaling fraction
  * @param[in]   x     unscaled integer
  *
  * @return      x * frac, avoiding truncation
- * @return      a wrong result if x * frac > 2**64
+ * @return      a wrong result if x * frac > 2**32
  */
-uint32_t frac_scale(const frac_t *frac, uint32_t x);
+static inline uint32_t frac_scale(const frac_t *frac, uint32_t x)
+{
+    uint32_t scaled = ((uint64_t)frac->frac * x) >> frac->shift;
+    return scaled;
+}
 
 #ifdef __cplusplus
 }

diff --git a/tests/bench_frac_div/main.c b/tests/bench_frac_div/main.c
@@ -43,12 +43,12 @@ static uint64_t buf[TEST_NUMOF];
 
 #define ARRAY_LEN(arr) (sizeof(arr) / sizeof((arr)[0]))
 
-/* Apply div_u64_by_15625div512 on all elements of buf */
-uint32_t bench_div_u64_by_15625div512(uint64_t *buf, size_t nelem)
+/* Apply div_u32_by_15625div512 on all elements of buf */
+uint32_t bench_div_u32_by_15625div512(uint64_t *buf, size_t nelem)
 {
     unsigned time_start = timer_read(TIM_REF_DEV);
     for (unsigned k = 0; k < nelem; ++k) {
-        buf[k] = div_u64_by_15625div512(buf[k]);
+        buf[k] = div_u32_by_15625div512(buf[k]);
     }
     unsigned time_end = timer_read(TIM_REF_DEV);
     return (time_end - time_start);
@@ -136,38 +136,12 @@ int main(void)
         puts("Error initializing timer!");
         while(1) {}
     }
-    for (unsigned k = 0; k <= 12; ++k) {
-        for (unsigned j = 0; j <= 12; ++j) {
-            for (unsigned den = 1; den <= 12; ++den) {
-                int prec = 0;
-                uint32_t rem = 0;
-                uint64_t q = frac_long_divide(k * j, den, &rem, &prec);
-                uint64_t i = 0;
-                uint64_t d = 0;
-                if (prec > 0) {
-                    i = (q >> (64 - prec));
-                    d = (q << prec);
-                }
-                else if (prec == 0) {
-                    d = q;
-                }
-                else {
-                    d = (q >> -prec);
-                }
-                unsigned shift = 32 - prec;
-                uint32_t s = (q >> 32);
-                uint32_t r = ((uint64_t)den * s) >> shift;
-                printf("(%2u x %2u) / %2u = %3" PRIu64 ".%016" PRIx64 " (0x%016" PRIx64 ", %d, %" PRIx32 "), %2" PRIu32 "\n", k, j, den, i, d, q, prec, rem, r);
-            }
-        }
-    }
-    //~ while(1) {}
     uint64_t seed = 12345;
     uint32_t variation = 4321;
     while (1) {
         ++seed;
         fill_buf(buf, ARRAY_LEN(buf), seed);
-        uint32_t time_div = bench_div_u64_by_15625div512(buf, ARRAY_LEN(buf));
+        uint32_t time_div = bench_div_u32_by_15625div512(buf, ARRAY_LEN(buf));
         fill_buf(buf, ARRAY_LEN(buf), seed);
         uint32_t time_frac = bench_frac(buf, ARRAY_LEN(buf), 512, 15625);
         fill_buf(buf, ARRAY_LEN(buf), seed);

diff --git a/tests/unittests/tests-frac/tests-frac.c b/tests/unittests/tests-frac/tests-frac.c
@@ -17,39 +17,49 @@
 #include "debug.h"
 
 static const uint32_t u32_fraction_operands[] = {
-    1000000l,
-    1l,
-    2l,
-    5l,
-    10l,
-    100l,
-    1000l,
-    1000000l,
-    2000000l,
-    4000000l,
-    8000000l,
-    16000000l,
-    32000000l,
-    32768l,
-    9600l,
-    38400l,
-    115200l,
-    230400l,
-    460800l,
-    921600l,
-    4096l,
-    15625l,
-    1048576l,
-    0x10000000l,
-    0x1000000l,
-    1000000000l,
-    999999733l,  /* <- prime */
-    512000000l,
-    1024000000l,
-    0x40000000l,
+    1ul,
+    2ul,
+    5ul,
+    10ul,
+    100ul,
+    1000ul,
+    1000000ul,
+    2000000ul,
+    4000000ul,
+    8000000ul,
+    16000000ul,
+    32000000ul,
+    641ul,
+    274177ul,
+    32768ul,
+    9600ul,
+    38400ul,
+    115200ul,
+    230400ul,
+    460800ul,
+    921600ul,
+    4096ul,
+    15625ul,
+    1048576ul,
+    0x10000000ul,
+    0x1000000ul,
+    1000000000ul,
+    999999733ul,  /* <- prime */
+    512000000ul,
+    1024000000ul,
+    0x40000000ul,
+    0x80000000ul,
+    0xc0000000ul,
+    0xe0000000ul,
+    0xf0000000ul,
+    0xfffffff0ul,
+    0xfffffff8ul,
+    0xfffffffcul,
+    0xfffffffeul,
+    0xfffffffful,
 };
 
-static const uint64_t u64_test_values[] = {
+static const uint32_t u32_test_values[] = {
     0ul,
     1ul,
     10ul,
@@ -74,75 +84,53 @@ static const uint64_t u64_test_values[] = {
     100000000ul,
     2100012683ul,            /* <- prime */
     0x7ffffffful,
-#if 0
-    11111111111ull,
-    0x100000000ull,
-    16383999997ull,
-    16383999998ull,
-    16383999999ull,
-    16384000000ull,
-    1048575999807ull,
-    1048575999808ull,
-    1048575999809ull,
-    0xabcdef01ull,
-    0xabcdef012ull,
-    0xabcdef0123ull,
-    0xabcdef01234ull,
-    0xabcdef012345ull,
-    0xabcdef0123456ull,
-    0xabcdef01234567ull,
-    0x111111111111111ull,
-    0xffffffffffffeeull,
-    0x777777777777777ull,
-    0x1111111111111111ull,
-    0x7fffffffffffffffull,
-    0x8000000000000000ull,
-#endif
+    0x80000000ul,
+    0xc0000000ul,
+    0xe0000000ul,
+    0xf0000000ul,
+    0xfffffff0ul,
+    0xfffffff8ul,
+    0xfffffffcul,
+    0xfffffffeul,
+    0xfffffffful,
 };
 
 #define N_U32_OPERANDS (sizeof(u32_fraction_operands) / sizeof(u32_fraction_operands[0]))
-#define N_U64_VALS (sizeof(u64_test_values) / sizeof(u64_test_values[0]))
+#define N_U32_VALS (sizeof(u32_test_values) / sizeof(u32_test_values[0]))
 
 static void test_frac_scale32(void)
 {
     for (unsigned k = 0; k < N_U32_OPERANDS; ++k) {
         for (unsigned j = 0; j < N_U32_OPERANDS; ++j) {
-            int32_t num = u32_fraction_operands[j];
-            int32_t den = u32_fraction_operands[k];
+            uint32_t num = u32_fraction_operands[j];
+            uint32_t den = u32_fraction_operands[k];
             frac_t frac;
             frac_init(&frac, num, den);
-            for (unsigned i = 0; i < N_U64_VALS; i++) {
-                DEBUG("Scaling %" PRIu64 " by (%" PRIu32 " / %" PRIu32 "), ",
-                    u64_test_values[i], num, den);
-                /* Using 128 bit intermediate result by reusing libdivide functions.
-                 * For a more decoupled verification, this should use
-                 * 128 bit integers directly, but that is only supported by GCC
-                 * on 64 bit platforms, which would prevent us from running this
-                 * test on actual MCUs. We assume that
-                 * libdivide__mullhi_u64,
-                 * libdivide_128_div_64_to_64
-                 * have been verified by other tests in the upstream test suite.
-                 */
-                /* high 64 bits of intermediate result */
-                uint64_t hi = libdivide__mullhi_u64(num, u64_test_values[i]);
-                /* low 64 bits of intermediate result */
-                uint64_t lo = u64_test_values[i] * num;
-                /* remainder from division, will be set to (uint64_t) -1 if the
-                 * result does not fit */
-                uint64_t rem = 0;
-                /* compute 128/64 -> 64 division */
-                uint64_t expected = libdivide_128_div_64_to_64(hi, lo, den, &rem);
+            for (unsigned i = 0; i < N_U32_VALS; i++) {
+                DEBUG("Scaling %" PRIu32 " by (%" PRIu32 " / %" PRIu32 "), ",
+                    u32_test_values[i], num, den);
+                /* intermediate result */
+                volatile uint64_t tmp = (uint64_t)u32_test_values[i] * num;
+                volatile uint64_t expected = tmp / (uint64_t)den;
                 if (expected > 0xfffffffful) {
+                    /* result will not fit */
                     continue;
                 }
-                if (rem == (uint64_t) -1) {
-                    /* Expected value does not fit in a 64 bit unsigned integer */
-                    DEBUG("overflow, skipping\n");
-                    continue;
-                }
-                uint64_t actual = frac_scale(&frac, u64_test_values[i]);
+                uint32_t actual = frac_scale(&frac, u32_test_values[i]);
                 if ((uint32_t)expected != actual) {
-                    printf("expect %" PRIu64 ", actual %" PRIu64 "\n",  expected, actual);
+                    int32_t diff = actual - expected;
+                    DEBUG("%" PRIu32 " * (%" PRIu32 " / %" PRIu32 ")"
+                        " tmp %" PRIu64 " expect %" PRIu32 ", actual %" PRIu32 ", diff = %" PRId32 "\n",
+                        u32_test_values[i], num, den, tmp, (uint32_t)expected,
+                        actual, diff);
+                    if (tmp >= (1ul << 31)) {
+                        /* The frac algorithm sacrifices accuracy for speed,
+                         * some large numbers will be incorrectly rounded,
+                         * resulting in small differences here.. */
+                        if (diff < 3) {
+                            continue;
+                        }
+                    }
                 }
                 TEST_ASSERT_EQUAL_INT((uint32_t)expected, (uint32_t)actual);
             }