From 3766b2d10be683339f701a3bf8b6fbd06782e95d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 3 Aug 2024 22:59:58 -0500 Subject: [PATCH] f128 division works!! --- src/float/div.rs | 41 +++++++++--- testcrate/tests/div_rem.rs | 122 +++++++++++++++++------------------- testcrate/tests/div_unit.rs | 14 +++++ 3 files changed, 102 insertions(+), 75 deletions(-) create mode 100644 testcrate/tests/div_unit.rs diff --git a/src/float/div.rs b/src/float/div.rs index c845d1fe..63bb9a63 100644 --- a/src/float/div.rs +++ b/src/float/div.rs @@ -11,6 +11,7 @@ Relevant notation: fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM` will have N bits of integer and M-N bits of fraction. - `hw`: half width, i.e. for `f64` this will be a `u32`. +- `x` is the best estimate of `1/b` # Method Overview @@ -110,6 +111,7 @@ const fn get_iterations() -> (usize, usize) { // If widening multiply will be efficient (uses word-sized integers), there is no reason // to use half-sized iterations. + // TODO: use half iterations. if 2 * size_of::() <= size_of::<*const ()>() { (0, total_iterations) } else { @@ -422,6 +424,7 @@ where // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is // expected to be strictly positive because b_UQ1_hw has its highest bit set // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + // let corr_uq1_hw: HalfRep = zero_hw.wrapping_sub(x_uq0_hw.widen_mul(b_uq1_hw).hi()); // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally @@ -436,6 +439,7 @@ where // The fact corr_UQ1_hw was virtually round up (due to result of // multiplication being **first** truncated, then negated - to improve // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + // x_uq0_hw = (x_uq0_hw.widen_mul(corr_uq1_hw) >> (hw - 1)).cast(); // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t @@ -632,18 +636,35 @@ where F::from_repr(abs_result | quotient_sign) } -/// Perform one iteration at any width. -/// -/// Given -fn iter_once(x_uq0: I, b_uq1: I) -> I -where - I: Int + HInt, - ::D: ops::Shr::D>, -{ - let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi()); - (x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo() +mod implementation { + use crate::int::{DInt, HInt, Int}; + use core::ops; + + /// Perform one iteration at any width to approach `1/b`, given previous guess `x`. It returns + /// the next `x` as a UQ0 number. + /// + /// This is the `x_{n+1} = 2*x_n - b*x_n^2` algorithm, implemented as `x_n * (2 - b*x_n)`. + pub fn iter_once(x_uq0: I, b_uq1: I) -> I + where + I: Int + HInt, + ::D: ops::Shr::D>, + { + // `corr = 2 - b*x_n` + // + // This looks like `0 - b*x_n`. However, this works - in `UQ1`, `0.0 - x = 2.0 - x`. + let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi()); + + // `x_n * corr = x_n * (2 - b*x_n)` + (x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo() + } } +#[cfg(not(feature = "public-test-deps"))] +use implementation::*; + +#[cfg(feature = "public-test-deps")] +pub use implementation::*; + intrinsics! { #[avr_skip] #[arm_aeabi_alias = __aeabi_fdiv] diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs index a5d73221..abf886eb 100644 --- a/testcrate/tests/div_rem.rs +++ b/testcrate/tests/div_rem.rs @@ -147,85 +147,77 @@ mod float_div { f32, __divsf3, Single, all(); f64, __divdf3, Double, all(); } -} - -#[cfg(target_arch = "arm")] -mod float_div_arm { - use super::*; + #[cfg(target_arch = "arm")] float! { f32, __divsf3vfp, Single, all(); f64, __divdf3vfp, Double, all(); } -} - -#[cfg(not(feature = "no-f16-f128"))] -#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] -mod float_div_f128 { - use super::*; + #[cfg(not(feature = "no-f16-f128"))] #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] float! { f128, __divtf3, Quad, not(feature = "no-sys-f128"); } + #[cfg(not(feature = "no-f16-f128"))] #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] float! { f128, __divkf3, Quad, not(feature = "no-sys-f128"); } } -#[test] -fn problem_f128() { - use compiler_builtins::float::div::__divtf3; - - let a = f128::from_bits(0x00000000000000000000000000000001); - let b = f128::from_bits(0x0001FFFFFFFFFFFFFFFFFFFFFFFFFFFF); - let res = __divtf3(a, b); - println!( - "{:#036x} / {:#036x} = {:#036x}", - a.to_bits(), - b.to_bits(), - res.to_bits() - ); - // got 0x3f8f0000000000000000000000000001 - // exp 0x3f8e0000000000000000000000000001 - assert_eq!(res.to_bits(), 0x3F8E0000000000000000000000000001); - panic!(); -} - -#[test] -fn not_problem_f64() { - use compiler_builtins::float::div::__divdf3; - - let a = f64::from_bits(0x0000000000000001); - let b = f64::from_bits(0x001FFFFFFFFFFFFF); - let res = __divdf3(a, b); - println!( - "{:#018x} / {:#018x} = {:#018x}", - a.to_bits(), - b.to_bits(), - res.to_bits() - ); - // 0x3CA0000000000001 - assert_eq!(res.to_bits(), 0x3CA0000000000001); - panic!(); -} - -#[test] -fn not_problem_f32() { - use compiler_builtins::float::div::__divsf3; - - let a = f32::from_bits(0x00000001); - let b = f32::from_bits(0x00FFFFFF); - let res = __divsf3(a, b); - println!( - "{:#010x} / {:#010x} = {:#010x}", - a.to_bits(), - b.to_bits(), - res.to_bits() - ); - // 0x33800001 - assert_eq!(res.to_bits(), 0x33800001); - panic!(); -} +// #[test] +// fn problem_f128() { +// use compiler_builtins::float::div::__divtf3; + +// let a = f128::from_bits(0x00000000000000000000000000000001); +// let b = f128::from_bits(0x0001FFFFFFFFFFFFFFFFFFFFFFFFFFFF); +// let res = __divtf3(a, b); +// println!( +// "{:#036x} / {:#036x} = {:#036x}", +// a.to_bits(), +// b.to_bits(), +// res.to_bits() +// ); +// // got 0x3f8f0000000000000000000000000001 +// // exp 0x3f8e0000000000000000000000000001 +// assert_eq!(res.to_bits(), 0x3F8E0000000000000000000000000001); +// panic!(); +// } + +// #[test] +// fn not_problem_f64() { +// use compiler_builtins::float::div::__divdf3; + +// let a = f64::from_bits(0x0000000000000001); +// let b = f64::from_bits(0x001FFFFFFFFFFFFF); +// let res = __divdf3(a, b); +// println!( +// "{:#018x} / {:#018x} = {:#018x}", +// a.to_bits(), +// b.to_bits(), +// res.to_bits() +// ); +// // 0x3CA0000000000001 +// assert_eq!(res.to_bits(), 0x3CA0000000000001); +// panic!(); +// } + +// #[test] +// fn not_problem_f32() { +// use compiler_builtins::float::div::__divsf3; + +// let a = f32::from_bits(0x00000001); +// let b = f32::from_bits(0x00FFFFFF); +// let res = __divsf3(a, b); +// println!( +// "{:#010x} / {:#010x} = {:#010x}", +// a.to_bits(), +// b.to_bits(), +// res.to_bits() +// ); +// // 0x33800001 +// assert_eq!(res.to_bits(), 0x33800001); +// panic!(); +// } diff --git a/testcrate/tests/div_unit.rs b/testcrate/tests/div_unit.rs new file mode 100644 index 00000000..7a22a414 --- /dev/null +++ b/testcrate/tests/div_unit.rs @@ -0,0 +1,14 @@ +//! Unit tests for division since they can't be in-crate. + +// use compiler_builtins::float::div::iter_once; + +// #[test] +// fn test_iter_once() { +// dbg!(iter_once(0u32, 0u32)); +// dbg!(iter_once(1u32, 0u32)); +// dbg!(iter_once(0u32, 1u32)); +// dbg!(iter_once(0u32, 1u32)); +// dbg!(iter_once(u32::MAX, u16::MAX as u32)); +// dbg!(u32::MAX, u16::MAX); +// panic!(); +// }