Auto merge of #103913 - Neutron3529:patch-1, r=thomcc · rust-lang/rust@6284998

Commit

Auto merge of #103913 - Neutron3529:patch-1, r=thomcc

Improve performance of `rem_euclid()` for signed integers

such code is copy from
https://github.com/rust-lang/rust/blob/master/library/std/src/f32.rs and
https://github.com/rust-lang/rust/blob/master/library/std/src/f64.rs
using `r+rhs.abs()` is faster than calc it with an if clause. Bench result:
```
$ cargo bench
   Compiling div-euclid v0.1.0 (/me/div-euclid)
    Finished bench [optimized] target(s) in 1.01s
     Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef)

running 7 tests
test tests::it_works ... ignored
test tests::bench_aaabs     ... bench:  10,498,793 ns/iter (+/- 104,360)
test tests::bench_aadefault ... bench:  11,061,862 ns/iter (+/- 94,107)
test tests::bench_abs       ... bench:  10,477,193 ns/iter (+/- 81,942)
test tests::bench_default   ... bench:  10,622,983 ns/iter (+/- 25,119)
test tests::bench_zzabs     ... bench:  10,481,971 ns/iter (+/- 43,787)
test tests::bench_zzdefault ... bench:  11,074,976 ns/iter (+/- 29,633)

test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s
```
It seems that, default `rem_euclid` triggered a branch prediction, thus `bench_default` is faster than `bench_aadefault` and `bench_aadefault`, which shuffles the order of calculations. but all of them slower than what it was in `f64`'s and `f32`'s `rem_euclid`, thus I submit this PR.

bench code:
```rust
#![feature(test)]
extern crate test;

fn rem_euclid(a:i32,rhs:i32)->i32{
    let r = a % rhs;
    if r < 0 { r + rhs.abs() } else { r }
}

#[cfg(test)]
mod tests {
    use super::*;
    use test::Bencher;
    use rand::prelude::*;
    use rand::rngs::SmallRng;
    const N:i32=1000;
    #[test]
    fn it_works() {
        let a: i32 = 7; // or any other integer type
        let b = 4;

        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();

        for i in &d {
            for j in &n {
                assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j));
            }
        }

        assert_eq!(rem_euclid(a,b), 3);
        assert_eq!(rem_euclid(-a,b), 1);
        assert_eq!(rem_euclid(a,-b), 3);
        assert_eq!(rem_euclid(-a,-b), 1);
    }

    #[bench]
    fn bench_aaabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_aadefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_abs(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_default(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_zzabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_zzdefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }
}
```

Loading branch information

bors committed Nov 12, 2022

2 parents cd12888 + d81a0e9 commit 6284998

library/core/src/num/int_macros.rs

-Original file line number
+Diff line change
@@ Expand Up / @@ -2072,11 +2072,15 @@ macro_rules! int_impl { @@
             pub const fn rem_euclid(self, rhs: Self) -> Self {
                 let r = self % rhs;
                 if r < 0 {
-                    if rhs < 0 {
-                        r - rhs
-                    } else {
-                        r + rhs
-                    }
+                    // Semantically equivalent to `if rhs < 0 { r - rhs } else { r + rhs }`.
+                    // If `rhs` is not `Self::MIN`, then `r + abs(rhs)` will not overflow
+                    // and is clearly equivalent, because `r` is negative.
+                    // Otherwise, `rhs` is `Self::MIN`, then we have
+                    // `r.wrapping_add(Self::MIN.wrapping_abs())`, which evaluates
+                    // to `r.wrapping_add(Self::MIN)`, which is equivalent to
+                    // `r - Self::MIN`, which is what we wanted (and will not overflow
+                    // for negative `r`).
+                    r.wrapping_add(rhs.wrapping_abs())
                 } else {
                     r
                 }
@@ Expand Down @@

0 comments on commit `6284998`

Please sign in to comment.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Commit

There are no files selected for viewing

0 comments on commit `6284998`

Commit

There are no files selected for viewing

0 comments on commit 6284998

0 comments on commit `6284998`