Skip to content

Commit

Permalink
f128 division works!!
Browse files Browse the repository at this point in the history
  • Loading branch information
tgross35 committed Aug 4, 2024
1 parent a17b3a4 commit 3766b2d
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 75 deletions.
41 changes: 31 additions & 10 deletions src/float/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Relevant notation:
fixed-point number with 1 integral bit, and 31 decimal bits. A `uqN` variable of type `uM`
will have N bits of integer and M-N bits of fraction.
- `hw`: half width, i.e. for `f64` this will be a `u32`.
- `x` is the best estimate of `1/b`
# Method Overview
Expand Down Expand Up @@ -110,6 +111,7 @@ const fn get_iterations<F: Float>() -> (usize, usize) {

// If widening multiply will be efficient (uses word-sized integers), there is no reason
// to use half-sized iterations.
// TODO: use half iterations.
if 2 * size_of::<F>() <= size_of::<*const ()>() {
(0, total_iterations)
} else {
Expand Down Expand Up @@ -422,6 +424,7 @@ where
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).

// let corr_uq1_hw: HalfRep<F> = zero_hw.wrapping_sub(x_uq0_hw.widen_mul(b_uq1_hw).hi());

// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
Expand All @@ -436,6 +439,7 @@ where
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.

// x_uq0_hw = (x_uq0_hw.widen_mul(corr_uq1_hw) >> (hw - 1)).cast();

// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
Expand Down Expand Up @@ -632,18 +636,35 @@ where
F::from_repr(abs_result | quotient_sign)
}

/// Perform one iteration at any width.
///
/// Given
fn iter_once<I>(x_uq0: I, b_uq1: I) -> I
where
I: Int + HInt,
<I as HInt>::D: ops::Shr<u32, Output = <I as HInt>::D>,
{
let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi());
(x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo()
mod implementation {
use crate::int::{DInt, HInt, Int};
use core::ops;

/// Perform one iteration at any width to approach `1/b`, given previous guess `x`. It returns
/// the next `x` as a UQ0 number.
///
/// This is the `x_{n+1} = 2*x_n - b*x_n^2` algorithm, implemented as `x_n * (2 - b*x_n)`.
pub fn iter_once<I>(x_uq0: I, b_uq1: I) -> I
where
I: Int + HInt,
<I as HInt>::D: ops::Shr<u32, Output = <I as HInt>::D>,
{
// `corr = 2 - b*x_n`
//
// This looks like `0 - b*x_n`. However, this works - in `UQ1`, `0.0 - x = 2.0 - x`.
let corr_uq1: I = I::ZERO.wrapping_sub(x_uq0.widen_mul(b_uq1).hi());

// `x_n * corr = x_n * (2 - b*x_n)`
(x_uq0.widen_mul(corr_uq1) >> (I::BITS - 1)).lo()
}
}

#[cfg(not(feature = "public-test-deps"))]
use implementation::*;

#[cfg(feature = "public-test-deps")]
pub use implementation::*;

intrinsics! {
#[avr_skip]
#[arm_aeabi_alias = __aeabi_fdiv]
Expand Down
122 changes: 57 additions & 65 deletions testcrate/tests/div_rem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,85 +147,77 @@ mod float_div {
f32, __divsf3, Single, all();
f64, __divdf3, Double, all();
}
}

#[cfg(target_arch = "arm")]
mod float_div_arm {
use super::*;

#[cfg(target_arch = "arm")]
float! {
f32, __divsf3vfp, Single, all();
f64, __divdf3vfp, Double, all();
}
}

#[cfg(not(feature = "no-f16-f128"))]
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
mod float_div_f128 {
use super::*;

#[cfg(not(feature = "no-f16-f128"))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
float! {
f128, __divtf3, Quad, not(feature = "no-sys-f128");
}

#[cfg(not(feature = "no-f16-f128"))]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
float! {
f128, __divkf3, Quad, not(feature = "no-sys-f128");
}
}

#[test]
fn problem_f128() {
use compiler_builtins::float::div::__divtf3;

let a = f128::from_bits(0x00000000000000000000000000000001);
let b = f128::from_bits(0x0001FFFFFFFFFFFFFFFFFFFFFFFFFFFF);
let res = __divtf3(a, b);
println!(
"{:#036x} / {:#036x} = {:#036x}",
a.to_bits(),
b.to_bits(),
res.to_bits()
);
// got 0x3f8f0000000000000000000000000001
// exp 0x3f8e0000000000000000000000000001
assert_eq!(res.to_bits(), 0x3F8E0000000000000000000000000001);
panic!();
}

#[test]
fn not_problem_f64() {
use compiler_builtins::float::div::__divdf3;

let a = f64::from_bits(0x0000000000000001);
let b = f64::from_bits(0x001FFFFFFFFFFFFF);
let res = __divdf3(a, b);
println!(
"{:#018x} / {:#018x} = {:#018x}",
a.to_bits(),
b.to_bits(),
res.to_bits()
);
// 0x3CA0000000000001
assert_eq!(res.to_bits(), 0x3CA0000000000001);
panic!();
}

#[test]
fn not_problem_f32() {
use compiler_builtins::float::div::__divsf3;

let a = f32::from_bits(0x00000001);
let b = f32::from_bits(0x00FFFFFF);
let res = __divsf3(a, b);
println!(
"{:#010x} / {:#010x} = {:#010x}",
a.to_bits(),
b.to_bits(),
res.to_bits()
);
// 0x33800001
assert_eq!(res.to_bits(), 0x33800001);
panic!();
}
// #[test]
// fn problem_f128() {
// use compiler_builtins::float::div::__divtf3;

// let a = f128::from_bits(0x00000000000000000000000000000001);
// let b = f128::from_bits(0x0001FFFFFFFFFFFFFFFFFFFFFFFFFFFF);
// let res = __divtf3(a, b);
// println!(
// "{:#036x} / {:#036x} = {:#036x}",
// a.to_bits(),
// b.to_bits(),
// res.to_bits()
// );
// // got 0x3f8f0000000000000000000000000001
// // exp 0x3f8e0000000000000000000000000001
// assert_eq!(res.to_bits(), 0x3F8E0000000000000000000000000001);
// panic!();
// }

// #[test]
// fn not_problem_f64() {
// use compiler_builtins::float::div::__divdf3;

// let a = f64::from_bits(0x0000000000000001);
// let b = f64::from_bits(0x001FFFFFFFFFFFFF);
// let res = __divdf3(a, b);
// println!(
// "{:#018x} / {:#018x} = {:#018x}",
// a.to_bits(),
// b.to_bits(),
// res.to_bits()
// );
// // 0x3CA0000000000001
// assert_eq!(res.to_bits(), 0x3CA0000000000001);
// panic!();
// }

// #[test]
// fn not_problem_f32() {
// use compiler_builtins::float::div::__divsf3;

// let a = f32::from_bits(0x00000001);
// let b = f32::from_bits(0x00FFFFFF);
// let res = __divsf3(a, b);
// println!(
// "{:#010x} / {:#010x} = {:#010x}",
// a.to_bits(),
// b.to_bits(),
// res.to_bits()
// );
// // 0x33800001
// assert_eq!(res.to_bits(), 0x33800001);
// panic!();
// }
14 changes: 14 additions & 0 deletions testcrate/tests/div_unit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//! Unit tests for division since they can't be in-crate.

// use compiler_builtins::float::div::iter_once;

// #[test]
// fn test_iter_once() {
// dbg!(iter_once(0u32, 0u32));
// dbg!(iter_once(1u32, 0u32));
// dbg!(iter_once(0u32, 1u32));
// dbg!(iter_once(0u32, 1u32));
// dbg!(iter_once(u32::MAX, u16::MAX as u32));
// dbg!(u32::MAX, u16::MAX);
// panic!();
// }

0 comments on commit 3766b2d

Please sign in to comment.