From 0dd996907c34d86d8c1f85dbd1efb12c948e786a Mon Sep 17 00:00:00 2001
From: hkalbasi <hamidrezakalbasi@protonmail.com>
Date: Mon, 16 Dec 2024 22:25:51 +0330
Subject: [PATCH 1/2] Use divide and conquer in to_radix_digits

---
 benches/bigint.rs      | 10 +++++
 src/biguint/convert.rs | 92 ++++++++++++++++++++++++++++++------------
 2 files changed, 76 insertions(+), 26 deletions(-)
diff --git a/benches/bigint.rs b/benches/bigint.rs
index 22795274..9b1ae2ff 100644
--- a/benches/bigint.rs
+++ b/benches/bigint.rs
@@ -210,6 +210,16 @@ fn to_str_radix_10_2(b: &mut Bencher) {
     to_str_radix_bench(b, 10, 10009);
 }
 
+#[bench]
+fn to_str_radix_10_3(b: &mut Bencher) {
+    to_str_radix_bench(b, 10, 100009);
+}
+
+#[bench]
+fn to_str_radix_10_4(b: &mut Bencher) {
+    to_str_radix_bench(b, 10, 1000009);
+}
+
 #[bench]
 fn to_str_radix_16(b: &mut Bencher) {
     to_str_radix_bench(b, 16, 1009);
diff --git a/src/biguint/convert.rs b/src/biguint/convert.rs
index 3daf3dcb..757d932d 100644
--- a/src/biguint/convert.rs
+++ b/src/biguint/convert.rs
@@ -1,7 +1,7 @@
 // This uses stdlib features higher than the MSRV
 #![allow(clippy::manual_range_contains)] // 1.35
 
-use super::{biguint_from_vec, BigUint, ToBigUint};
+use super::{biguint_from_vec, BigUint, IntDigits, ToBigUint};
 
 use super::addition::add2;
 use super::division::{div_rem_digit, FAST_DIV_WIDE};
@@ -701,34 +701,48 @@ pub(super) fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec<u8> {
     // The threshold for this was chosen by anecdotal performance measurements to
     // approximate where this starts to make a noticeable difference.
     if digits.data.len() >= 64 {
-        let mut big_base = BigUint::from(base);
-        let mut big_power = 1usize;
-
-        // Choose a target base length near √n.
-        let target_len = digits.data.len().sqrt();
-        while big_base.data.len() < target_len {
-            big_base = &big_base * &big_base;
-            big_power *= 2;
-        }
-
-        // This outer loop will run approximately √n times.
-        while digits > big_base {
-            // This is still the dominating factor, with n digits divided by √n digits.
-            let (q, mut big_r) = digits.div_rem(&big_base);
-            digits = q;
-
-            // This inner loop now has O(√n²)=O(n) behavior altogether.
-            for _ in 0..big_power {
-                let (q, mut r) = div_rem_digit(big_r, base);
-                big_r = q;
-                for _ in 0..power {
-                    res.push((r % radix) as u8);
-                    r /= radix;
-                }
+        let mut big_bases = Vec::with_capacity(32);
+        big_bases.push((BigUint::from(base), power));
+
+        loop {
+            let (big_base, power) = big_bases.last().unwrap();
+            if big_base.data.len() > digits.data.len() / 2 + 1 {
+                break;
             }
+            let next_big_base = big_base * big_base;
+            let next_power = *power * 2;
+            big_bases.push((next_big_base, next_power));
+        }
+
+        to_radix_digits_le_divide_and_conquer(
+            digits,
+            base,
+            power,
+            &big_bases,
+            big_bases.len() - 1,
+            &mut res,
+            radix,
+        );
+        while res.last() == Some(&0) {
+            res.pop();
         }
+        return res;
     }
 
+    to_radix_digits_le_small(digits, base, power, &mut res, radix);
+
+    res
+}
+
+// Extract little-endian radix digits for small numbers
+#[inline(always)] // forced inline to get const-prop for radix=10
+fn to_radix_digits_le_small(
+    mut digits: BigUint,
+    base: u64,
+    power: usize,
+    res: &mut Vec<u8>,
+    radix: u64,
+) {
     while digits.data.len() > 1 {
         let (q, mut r) = div_rem_digit(digits, base);
         for _ in 0..power {
@@ -743,8 +757,34 @@ pub(super) fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec<u8> {
         res.push((r % radix) as u8);
         r /= radix;
     }
+}
 
-    res
+fn to_radix_digits_le_divide_and_conquer(
+    number: BigUint,
+    base: u64,
+    power: usize,
+    big_bases: &[(BigUint, usize)],
+    k: usize,
+    res: &mut Vec<u8>,
+    radix: u64,
+) {
+    let &(ref big_base, result_len) = &big_bases[k];
+    if number.data.len() < 8 {
+        let prev_res_len = res.len();
+        if !number.is_zero() {
+            to_radix_digits_le_small(number, base, power, res, radix);
+        }
+        while res.len() < prev_res_len + result_len * 2 {
+            res.push(0);
+        }
+        return;
+    }
+    // number always has two digits in the big base
+    let (digit_1, digit_2) = number.div_rem(big_base);
+    assert!(&digit_1 < big_base);
+    assert!(&digit_2 < big_base);
+    to_radix_digits_le_divide_and_conquer(digit_2, base, power, big_bases, k - 1, res, radix);
+    to_radix_digits_le_divide_and_conquer(digit_1, base, power, big_bases, k - 1, res, radix);
 }
 
 pub(super) fn to_radix_le(u: &BigUint, radix: u32) -> Vec<u8> {

From d6313e2d08b20f68bc30d2053c284e72f76f7e30 Mon Sep 17 00:00:00 2001
From: hkalbasi <hamidrezakalbasi@protonmail.com>
Date: Tue, 17 Dec 2024 00:20:39 +0330
Subject: [PATCH 2/2] Update the threshold in to_radix_digits

---
 src/biguint/convert.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/biguint/convert.rs b/src/biguint/convert.rs
index 757d932d..0aa67d31 100644
--- a/src/biguint/convert.rs
+++ b/src/biguint/convert.rs
@@ -700,7 +700,7 @@ pub(super) fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec<u8> {
     // performance. We can mitigate this by dividing into chunks of a larger base first.
     // The threshold for this was chosen by anecdotal performance measurements to
     // approximate where this starts to make a noticeable difference.
-    if digits.data.len() >= 64 {
+    if digits.data.len() >= 32 {
         let mut big_bases = Vec::with_capacity(32);
         big_bases.push((BigUint::from(base), power));