Skip to content

Commit

Permalink
squash frac: Rewrite algorithm for speed and simplicity
Browse files Browse the repository at this point in the history
  • Loading branch information
Joakim Nohlgård committed Aug 14, 2018
1 parent 4d78aaa commit eac1520
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 151 deletions.
32 changes: 5 additions & 27 deletions sys/frac/frac.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@
#include <stdio.h>
#include "assert.h"
#include "frac.h"
#include "libdivide.h"

#define ENABLE_DEBUG (1)
#define ENABLE_DEBUG (0)
#include "debug.h"

/**
Expand Down Expand Up @@ -86,11 +85,11 @@ uint32_t frac_long_divide(uint32_t num, uint32_t den, int *prec, uint32_t *rem)
/* Binary long division with adaptive number of fractional bits */
/* The result will be a Qx.y number where x is the number of bits in the
* integer part and y = 64 - x. Similar to floating point, except the result
* is unsigned, and we can only represent numbers in the range 2**-64..(2**64 - 1) */
* is unsigned, and we can only represent numbers in the range 2**-32..(2**32 - 1) */
assert(den); /* divide by zero */

uint32_t q = 0; /* Quotient */
uint32_t r = 0; /* Remainder */
uint64_t r = 0; /* Remainder */
if (prec) {
*prec = 0;
}
Expand Down Expand Up @@ -126,7 +125,7 @@ uint32_t frac_long_divide(uint32_t num, uint32_t den, int *prec, uint32_t *rem)
break;
}
}
if (r > (den / 2)) {
if (r > 0) {
++q;
}
if (prec) {
Expand All @@ -152,28 +151,7 @@ void frac_init(frac_t *frac, uint32_t num, uint32_t den)
int prec = 0;
uint32_t rem = 0;
frac->frac = frac_long_divide(num, den, &prec, &rem);
frac->shift = 32 - prec;
frac->shift = (sizeof(frac->frac) * 8) - prec;
DEBUG("frac_init32: gcd = %" PRIu32 " num = %" PRIu32 " den = %" PRIu32 " frac = 0x%08" PRIx32 " shift = %02d, rem = 0x%08" PRIx32 "\n",
gcd, num, den, frac->frac, frac->shift, rem);
frac->den = den;
frac->num = num;
}

uint32_t frac_scale(const frac_t *frac, uint32_t x)
{
assert(frac);
uint64_t scaled = ((uint64_t)frac->frac * x) >> frac->shift;
DEBUG("frac_scale32: x = %" PRIu32 " * %" PRIu32 " / %" PRIu32 " = %" PRIu64 "\n",
x, frac->num, frac->den, scaled);
//~ /* integer quotient */
//~ uint64_t quot = libdivide_u64_do(x, &frac->div);
//~ /* remainder */
//~ uint64_t rem = x - (quot * frac->den);
//~ /* quot * frac->num may wrap around when num > den and x is "big" */
//~ /* rem * frac->num will never wrap around as long as both num and den are at
//~ * most 32 bits wide, u32 x u32 -> u64 */
//~ uint64_t scaled = quot * frac->num + libdivide_u64_do(rem * frac->num, &frac->div);
//~ DEBUG("frac_scale32: x = %" PRIu64 " quot = %" PRIu64 " rem = %" PRIu64
//~ " scaled = %" PRIu64 "\n", x, quot, rem, scaled);
return scaled;
}
27 changes: 19 additions & 8 deletions sys/include/frac.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
* Precomputing the frac_t values can be done via the application found in
* `tests/frac-config` in the RIOT tree.
*
* ### Numeric precision
*
* The algorithm will under certain circumstances give an incorrectly rounded
* result, more precisely, the result may sometimes be rounded up instead of
* rounded down when the product in the numerator, @$p = x \cdot num@$, would
* result in @$p >= 2^{31}@$. Fortunately, the relative error of this rounding
* mistake is small.
*
* This tradeoff is a design choice to make the algorithm faster.
*
* @see Libdivide homepage: http://libdivide.com/
*
* @file
Expand All @@ -33,7 +43,6 @@

#include <assert.h>
#include <stdint.h>
#include "libdivide.h"

#ifdef __cplusplus
extern "C" {
Expand All @@ -43,8 +52,6 @@ extern "C" {
* @brief frac descriptor for fraction consisting of two 32 bit integers
*/
typedef struct {
uint32_t num; /**< numerator */
uint32_t den; /**< denominator, needed for modulo operation */
uint32_t frac; /**< fraction */
uint8_t shift; /**< exponent */
} frac_t;
Expand All @@ -55,7 +62,7 @@ typedef struct {
* This function computes the mathematical parameters used by the frac algorithm.
*
* @note Be extra careful if @p num > @p den, the result from @ref frac_scale
* may not fit in a 64 bit integer if @c x is very big.
* may not fit in a 32 bit integer if @c x is big.
*
* @pre @p den must not be 0
*
Expand All @@ -66,17 +73,21 @@ typedef struct {
void frac_init(frac_t *frac, uint32_t num, uint32_t den);

/**
* @brief Scale a 64 bit integer by a 32/32 integer fraction
* @brief Scale a 32 bit integer by a 32/32 rational number
*
* @pre x * frac < 2**64, i.e. the result fits in a 64 bit integer
* @pre x * frac < 2**32, i.e. the result fits in a 32 bit integer
*
* @param[in] frac scaling fraction
* @param[in] x unscaled integer
*
* @return x * frac, avoiding truncation
* @return a wrong result if x * frac > 2**64
* @return a wrong result if x * frac > 2**32
*/
uint32_t frac_scale(const frac_t *frac, uint32_t x);
static inline uint32_t frac_scale(const frac_t *frac, uint32_t x)
{
uint32_t scaled = ((uint64_t)frac->frac * x) >> frac->shift;
return scaled;
}

#ifdef __cplusplus
}
Expand Down
34 changes: 4 additions & 30 deletions tests/bench_frac_div/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ static uint64_t buf[TEST_NUMOF];

#define ARRAY_LEN(arr) (sizeof(arr) / sizeof((arr)[0]))

/* Apply div_u64_by_15625div512 on all elements of buf */
uint32_t bench_div_u64_by_15625div512(uint64_t *buf, size_t nelem)
/* Apply div_u32_by_15625div512 on all elements of buf */
uint32_t bench_div_u32_by_15625div512(uint64_t *buf, size_t nelem)
{
unsigned time_start = timer_read(TIM_REF_DEV);
for (unsigned k = 0; k < nelem; ++k) {
buf[k] = div_u64_by_15625div512(buf[k]);
buf[k] = div_u32_by_15625div512(buf[k]);
}
unsigned time_end = timer_read(TIM_REF_DEV);
return (time_end - time_start);
Expand Down Expand Up @@ -136,38 +136,12 @@ int main(void)
puts("Error initializing timer!");
while(1) {}
}
for (unsigned k = 0; k <= 12; ++k) {
for (unsigned j = 0; j <= 12; ++j) {
for (unsigned den = 1; den <= 12; ++den) {
int prec = 0;
uint32_t rem = 0;
uint64_t q = frac_long_divide(k * j, den, &rem, &prec);
uint64_t i = 0;
uint64_t d = 0;
if (prec > 0) {
i = (q >> (64 - prec));
d = (q << prec);
}
else if (prec == 0) {
d = q;
}
else {
d = (q >> -prec);
}
unsigned shift = 32 - prec;
uint32_t s = (q >> 32);
uint32_t r = ((uint64_t)den * s) >> shift;
printf("(%2u x %2u) / %2u = %3" PRIu64 ".%016" PRIx64 " (0x%016" PRIx64 ", %d, %" PRIx32 "), %2" PRIu32 "\n", k, j, den, i, d, q, prec, rem, r);
}
}
}
//~ while(1) {}
uint64_t seed = 12345;
uint32_t variation = 4321;
while (1) {
++seed;
fill_buf(buf, ARRAY_LEN(buf), seed);
uint32_t time_div = bench_div_u64_by_15625div512(buf, ARRAY_LEN(buf));
uint32_t time_div = bench_div_u32_by_15625div512(buf, ARRAY_LEN(buf));
fill_buf(buf, ARRAY_LEN(buf), seed);
uint32_t time_frac = bench_frac(buf, ARRAY_LEN(buf), 512, 15625);
fill_buf(buf, ARRAY_LEN(buf), seed);
Expand Down
160 changes: 74 additions & 86 deletions tests/unittests/tests-frac/tests-frac.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,39 +17,49 @@
#include "debug.h"

static const uint32_t u32_fraction_operands[] = {
1000000l,
1l,
2l,
5l,
10l,
100l,
1000l,
1000000l,
2000000l,
4000000l,
8000000l,
16000000l,
32000000l,
32768l,
9600l,
38400l,
115200l,
230400l,
460800l,
921600l,
4096l,
15625l,
1048576l,
0x10000000l,
0x1000000l,
1000000000l,
999999733l, /* <- prime */
512000000l,
1024000000l,
0x40000000l,
1ul,
2ul,
5ul,
10ul,
100ul,
1000ul,
1000000ul,
2000000ul,
4000000ul,
8000000ul,
16000000ul,
32000000ul,
641ul,
274177ul,
32768ul,
9600ul,
38400ul,
115200ul,
230400ul,
460800ul,
921600ul,
4096ul,
15625ul,
1048576ul,
0x10000000ul,
0x1000000ul,
1000000000ul,
999999733ul, /* <- prime */
512000000ul,
1024000000ul,
0x40000000ul,
0x80000000ul,
0xc0000000ul,
0xe0000000ul,
0xf0000000ul,
0xfffffff0ul,
0xfffffff8ul,
0xfffffffcul,
0xfffffffeul,
0xfffffffful,
};

static const uint64_t u64_test_values[] = {
static const uint32_t u32_test_values[] = {
0ul,
1ul,
10ul,
Expand All @@ -74,75 +84,53 @@ static const uint64_t u64_test_values[] = {
100000000ul,
2100012683ul, /* <- prime */
0x7ffffffful,
#if 0
11111111111ull,
0x100000000ull,
16383999997ull,
16383999998ull,
16383999999ull,
16384000000ull,
1048575999807ull,
1048575999808ull,
1048575999809ull,
0xabcdef01ull,
0xabcdef012ull,
0xabcdef0123ull,
0xabcdef01234ull,
0xabcdef012345ull,
0xabcdef0123456ull,
0xabcdef01234567ull,
0x111111111111111ull,
0xffffffffffffeeull,
0x777777777777777ull,
0x1111111111111111ull,
0x7fffffffffffffffull,
0x8000000000000000ull,
#endif
0x80000000ul,
0xc0000000ul,
0xe0000000ul,
0xf0000000ul,
0xfffffff0ul,
0xfffffff8ul,
0xfffffffcul,
0xfffffffeul,
0xfffffffful,
};

#define N_U32_OPERANDS (sizeof(u32_fraction_operands) / sizeof(u32_fraction_operands[0]))
#define N_U64_VALS (sizeof(u64_test_values) / sizeof(u64_test_values[0]))
#define N_U32_VALS (sizeof(u32_test_values) / sizeof(u32_test_values[0]))

static void test_frac_scale32(void)
{
for (unsigned k = 0; k < N_U32_OPERANDS; ++k) {
for (unsigned j = 0; j < N_U32_OPERANDS; ++j) {
int32_t num = u32_fraction_operands[j];
int32_t den = u32_fraction_operands[k];
uint32_t num = u32_fraction_operands[j];
uint32_t den = u32_fraction_operands[k];
frac_t frac;
frac_init(&frac, num, den);
for (unsigned i = 0; i < N_U64_VALS; i++) {
DEBUG("Scaling %" PRIu64 " by (%" PRIu32 " / %" PRIu32 "), ",
u64_test_values[i], num, den);
/* Using 128 bit intermediate result by reusing libdivide functions.
* For a more decoupled verification, this should use
* 128 bit integers directly, but that is only supported by GCC
* on 64 bit platforms, which would prevent us from running this
* test on actual MCUs. We assume that
* libdivide__mullhi_u64,
* libdivide_128_div_64_to_64
* have been verified by other tests in the upstream test suite.
*/
/* high 64 bits of intermediate result */
uint64_t hi = libdivide__mullhi_u64(num, u64_test_values[i]);
/* low 64 bits of intermediate result */
uint64_t lo = u64_test_values[i] * num;
/* remainder from division, will be set to (uint64_t) -1 if the
* result does not fit */
uint64_t rem = 0;
/* compute 128/64 -> 64 division */
uint64_t expected = libdivide_128_div_64_to_64(hi, lo, den, &rem);
for (unsigned i = 0; i < N_U32_VALS; i++) {
DEBUG("Scaling %" PRIu32 " by (%" PRIu32 " / %" PRIu32 "), ",
u32_test_values[i], num, den);
/* intermediate result */
volatile uint64_t tmp = (uint64_t)u32_test_values[i] * num;
volatile uint64_t expected = tmp / (uint64_t)den;
if (expected > 0xfffffffful) {
/* result will not fit */
continue;
}
if (rem == (uint64_t) -1) {
/* Expected value does not fit in a 64 bit unsigned integer */
DEBUG("overflow, skipping\n");
continue;
}
uint64_t actual = frac_scale(&frac, u64_test_values[i]);
uint32_t actual = frac_scale(&frac, u32_test_values[i]);
if ((uint32_t)expected != actual) {
printf("expect %" PRIu64 ", actual %" PRIu64 "\n", expected, actual);
int32_t diff = actual - expected;
DEBUG("%" PRIu32 " * (%" PRIu32 " / %" PRIu32 ")"
" tmp %" PRIu64 " expect %" PRIu32 ", actual %" PRIu32 ", diff = %" PRId32 "\n",
u32_test_values[i], num, den, tmp, (uint32_t)expected,
actual, diff);
if (tmp >= (1ul << 31)) {
/* The frac algorithm sacrifices accuracy for speed,
* some large numbers will be incorrectly rounded,
* resulting in small differences here.. */
if (diff < 3) {
continue;
}
}
}
TEST_ASSERT_EQUAL_INT((uint32_t)expected, (uint32_t)actual);
}
Expand Down

0 comments on commit eac1520

Please sign in to comment.