Skip to content

Commit

Permalink
Rollup merge of #71510 - ssomers:btreemap_iter_intertwined, r=Mark-Si…
Browse files Browse the repository at this point in the history
…mulacrum

Btreemap iter intertwined

3 commits:

1. Introduced benchmarks for `BTreeMap::iter()`. Benchmarks named `iter_20` were of the whole iteration process, so I renamed them. Also the benchmarks of `range` that I wrote earlier weren't very good. I included an (awkwardly named) one that compares `iter()` to `range(..)` on the same set, because the contrast is surprising:
```
 name                                           ns/iter
 btree::map::range_unbounded_unbounded          28,176
 btree::map::range_unbounded_vs_iter            89,369
```
Both dig up the same pair of leaf edges. `range(..)` also checks that some keys are correctly ordered, the only thing `iter()` does more is to copy the map's length.

2. Slightly refactoring the code to what I find more readable (not in chronological order of discovery), boosts performance:
```
>cargo-benchcmp.exe benchcmp a1 a2 --threshold 5
 name                                   a1 ns/iter  a2 ns/iter  diff ns/iter   diff %  speedup
 btree::map::find_rand_100              18          17                    -1   -5.56%   x 1.06
 btree::map::first_and_last_10k         64          71                     7   10.94%   x 0.90
 btree::map::iter_0                     2,939       2,209               -730  -24.84%   x 1.33
 btree::map::iter_1                     6,845       2,696             -4,149  -60.61%   x 2.54
 btree::map::iter_100                   8,556       3,672             -4,884  -57.08%   x 2.33
 btree::map::iter_10k                   9,292       5,884             -3,408  -36.68%   x 1.58
 btree::map::iter_1m                    10,268      6,510             -3,758  -36.60%   x 1.58
 btree::map::iteration_mut_100000       478,575     453,050          -25,525   -5.33%   x 1.06
 btree::map::range_unbounded_unbounded  28,176      36,169             7,993   28.37%   x 0.78
 btree::map::range_unbounded_vs_iter    89,369      38,290           -51,079  -57.16%   x 2.33
 btree::set::clone_100_and_remove_all   4,801       4,245               -556  -11.58%   x 1.13
 btree::set::clone_10k_and_remove_all   529,450     496,030          -33,420   -6.31%   x 1.07
```
But you can tell from the `range_unbounded_*` lines that, despite an unwarranted, vengeful attack on the range_unbounded_unbounded benchmark, this change still doesn't allow `iter()` to catch up with `range(..)`.

3. I guess that `range(..)` copes so well because it intertwines the leftmost and rightmost descend towards leaf edges, doing the two root node accesses close together, perhaps exploiting a CPU's internal pipelining? So the third commit distils a version of `range_search` (which we can't use directly because of the `Ord` bound), and we get another boost:
```
cargo-benchcmp.exe benchcmp a2 a3 --threshold 5
 name                                   a2 ns/iter  a3 ns/iter  diff ns/iter   diff %  speedup
 btree::map::first_and_last_100         40          43                     3    7.50%   x 0.93
 btree::map::first_and_last_10k         71          64                    -7   -9.86%   x 1.11
 btree::map::iter_0                     2,209       1,719               -490  -22.18%   x 1.29
 btree::map::iter_1                     2,696       2,205               -491  -18.21%   x 1.22
 btree::map::iter_100                   3,672       2,943               -729  -19.85%   x 1.25
 btree::map::iter_10k                   5,884       3,929             -1,955  -33.23%   x 1.50
 btree::map::iter_1m                    6,510       5,532               -978  -15.02%   x 1.18
 btree::map::iteration_mut_100000       453,050     476,667           23,617    5.21%   x 0.95
 btree::map::range_included_excluded    405,075     371,297          -33,778   -8.34%   x 1.09
 btree::map::range_included_included    427,577     397,440          -30,137   -7.05%   x 1.08
 btree::map::range_unbounded_unbounded  36,169      28,175            -7,994  -22.10%   x 1.28
 btree::map::range_unbounded_vs_iter    38,290      30,838            -7,452  -19.46%   x 1.24
```
But I think this is just fake news from the microbenchmarking media. `iter()` is still trying to catch up with `range(..)`. And we can sure do without another function. So I would skip this 3rd commit.

r? @Mark-Simulacrum
  • Loading branch information
Dylan-DPC authored May 6, 2020
2 parents 8da5869 + 8730227 commit 78a25cb
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 75 deletions.
118 changes: 72 additions & 46 deletions src/liballoc/benches/btree/map.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use std::iter::Iterator;
use std::ops::Bound::{Excluded, Unbounded};
use std::ops::RangeBounds;
use std::vec::Vec;

use rand::{seq::SliceRandom, thread_rng, Rng};
Expand Down Expand Up @@ -117,7 +117,7 @@ map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}
map_find_seq_bench! {find_seq_100, 100, BTreeMap}
map_find_seq_bench! {find_seq_10_000, 10_000, BTreeMap}

fn bench_iter(b: &mut Bencher, size: i32) {
fn bench_iteration(b: &mut Bencher, size: i32) {
let mut map = BTreeMap::<i32, i32>::new();
let mut rng = thread_rng();

Expand All @@ -133,21 +133,21 @@ fn bench_iter(b: &mut Bencher, size: i32) {
}

#[bench]
pub fn iter_20(b: &mut Bencher) {
bench_iter(b, 20);
pub fn iteration_20(b: &mut Bencher) {
bench_iteration(b, 20);
}

#[bench]
pub fn iter_1000(b: &mut Bencher) {
bench_iter(b, 1000);
pub fn iteration_1000(b: &mut Bencher) {
bench_iteration(b, 1000);
}

#[bench]
pub fn iter_100000(b: &mut Bencher) {
bench_iter(b, 100000);
pub fn iteration_100000(b: &mut Bencher) {
bench_iteration(b, 100000);
}

fn bench_iter_mut(b: &mut Bencher, size: i32) {
fn bench_iteration_mut(b: &mut Bencher, size: i32) {
let mut map = BTreeMap::<i32, i32>::new();
let mut rng = thread_rng();

Expand All @@ -163,18 +163,18 @@ fn bench_iter_mut(b: &mut Bencher, size: i32) {
}

#[bench]
pub fn iter_mut_20(b: &mut Bencher) {
bench_iter_mut(b, 20);
pub fn iteration_mut_20(b: &mut Bencher) {
bench_iteration_mut(b, 20);
}

#[bench]
pub fn iter_mut_1000(b: &mut Bencher) {
bench_iter_mut(b, 1000);
pub fn iteration_mut_1000(b: &mut Bencher) {
bench_iteration_mut(b, 1000);
}

#[bench]
pub fn iter_mut_100000(b: &mut Bencher) {
bench_iter_mut(b, 100000);
pub fn iteration_mut_100000(b: &mut Bencher) {
bench_iteration_mut(b, 100000);
}

fn bench_first_and_last(b: &mut Bencher, size: i32) {
Expand Down Expand Up @@ -202,57 +202,83 @@ pub fn first_and_last_10k(b: &mut Bencher) {
bench_first_and_last(b, 10_000);
}

#[bench]
pub fn range_excluded_excluded(b: &mut Bencher) {
let size = 144;
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
const BENCH_RANGE_SIZE: i32 = 145;
const BENCH_RANGE_COUNT: i32 = BENCH_RANGE_SIZE * (BENCH_RANGE_SIZE - 1) / 2;

fn bench_range<F, R>(b: &mut Bencher, f: F)
where
F: Fn(i32, i32) -> R,
R: RangeBounds<i32>,
{
let map: BTreeMap<_, _> = (0..BENCH_RANGE_SIZE).map(|i| (i, i)).collect();
b.iter(|| {
for first in 0..size {
for last in first + 1..size {
black_box(map.range((Excluded(first), Excluded(last))));
let mut c = 0;
for i in 0..BENCH_RANGE_SIZE {
for j in i + 1..BENCH_RANGE_SIZE {
black_box(map.range(f(i, j)));
c += 1;
}
}
debug_assert_eq!(c, BENCH_RANGE_COUNT);
});
}

#[bench]
pub fn range_excluded_unbounded(b: &mut Bencher) {
let size = 144;
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
b.iter(|| {
for first in 0..size {
black_box(map.range((Excluded(first), Unbounded)));
}
});
pub fn range_included_excluded(b: &mut Bencher) {
bench_range(b, |i, j| i..j);
}

#[bench]
pub fn range_included_included(b: &mut Bencher) {
let size = 144;
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
b.iter(|| {
for first in 0..size {
for last in first..size {
black_box(map.range(first..=last));
}
}
});
bench_range(b, |i, j| i..=j);
}

#[bench]
pub fn range_included_unbounded(b: &mut Bencher) {
let size = 144;
bench_range(b, |i, _| i..);
}

#[bench]
pub fn range_unbounded_unbounded(b: &mut Bencher) {
bench_range(b, |_, _| ..);
}

fn bench_iter(b: &mut Bencher, repeats: i32, size: i32) {
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
b.iter(|| {
for first in 0..size {
black_box(map.range(first..));
for _ in 0..repeats {
black_box(map.iter());
}
});
}

/// Contrast range_unbounded_unbounded with `iter()`.
#[bench]
pub fn range_unbounded_unbounded(b: &mut Bencher) {
let size = 144;
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
b.iter(|| map.range(..));
pub fn range_unbounded_vs_iter(b: &mut Bencher) {
bench_iter(b, BENCH_RANGE_COUNT, BENCH_RANGE_SIZE);
}

#[bench]
pub fn iter_0(b: &mut Bencher) {
bench_iter(b, 1_000, 0);
}

#[bench]
pub fn iter_1(b: &mut Bencher) {
bench_iter(b, 1_000, 1);
}

#[bench]
pub fn iter_100(b: &mut Bencher) {
bench_iter(b, 1_000, 100);
}

#[bench]
pub fn iter_10k(b: &mut Bencher) {
bench_iter(b, 1_000, 10_000);
}

#[bench]
pub fn iter_1m(b: &mut Bencher) {
bench_iter(b, 1_000, 1_000_000);
}
76 changes: 47 additions & 29 deletions src/liballoc/collections/btree/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1540,16 +1540,10 @@ impl<K, V> IntoIterator for BTreeMap<K, V> {

fn into_iter(self) -> IntoIter<K, V> {
let mut me = ManuallyDrop::new(self);
if let Some(root) = me.root.as_mut() {
let root1 = unsafe { ptr::read(root).into_ref() };
let root2 = unsafe { ptr::read(root).into_ref() };
let len = me.length;

IntoIter {
front: Some(root1.first_leaf_edge()),
back: Some(root2.last_leaf_edge()),
length: len,
}
if let Some(root) = me.root.take() {
let (f, b) = full_range_search(root.into_ref());

IntoIter { front: Some(f), back: Some(b), length: me.length }
} else {
IntoIter { front: None, back: None, length: 0 }
}
Expand Down Expand Up @@ -2037,6 +2031,7 @@ where
}
}

/// Finds the leaf edges delimiting a specified range in or underneath a node.
fn range_search<BorrowType, K, V, Q: ?Sized, R: RangeBounds<Q>>(
root: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
range: R,
Expand Down Expand Up @@ -2122,6 +2117,33 @@ where
}
}

/// Equivalent to `range_search(k, v, ..)` without the `Ord` bound.
fn full_range_search<BorrowType, K, V>(
root: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
) -> (
Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>,
Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>,
) {
// We duplicate the root NodeRef here -- we will never access it in a way
// that overlaps references obtained from the root.
let mut min_node = unsafe { ptr::read(&root) };
let mut max_node = root;
loop {
let front = min_node.first_edge();
let back = max_node.last_edge();
match (front.force(), back.force()) {
(Leaf(f), Leaf(b)) => {
return (f, b);
}
(Internal(min_int), Internal(max_int)) => {
min_node = min_int.descend();
max_node = max_int.descend();
}
_ => unreachable!("BTreeMap has different depths"),
};
}
}

impl<K, V> BTreeMap<K, V> {
/// Gets an iterator over the entries of the map, sorted by key.
///
Expand All @@ -2146,12 +2168,12 @@ impl<K, V> BTreeMap<K, V> {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn iter(&self) -> Iter<'_, K, V> {
Iter {
range: Range {
front: self.root.as_ref().map(|r| r.as_ref().first_leaf_edge()),
back: self.root.as_ref().map(|r| r.as_ref().last_leaf_edge()),
},
length: self.length,
if let Some(root) = &self.root {
let (f, b) = full_range_search(root.as_ref());

Iter { range: Range { front: Some(f), back: Some(b) }, length: self.length }
} else {
Iter { range: Range { front: None, back: None }, length: 0 }
}
}

Expand All @@ -2178,19 +2200,15 @@ impl<K, V> BTreeMap<K, V> {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn iter_mut(&mut self) -> IterMut<'_, K, V> {
IterMut {
range: if let Some(root) = &mut self.root {
let root1 = root.as_mut();
let root2 = unsafe { ptr::read(&root1) };
RangeMut {
front: Some(root1.first_leaf_edge()),
back: Some(root2.last_leaf_edge()),
_marker: PhantomData,
}
} else {
RangeMut { front: None, back: None, _marker: PhantomData }
},
length: self.length,
if let Some(root) = &mut self.root {
let (f, b) = full_range_search(root.as_mut());

IterMut {
range: RangeMut { front: Some(f), back: Some(b), _marker: PhantomData },
length: self.length,
}
} else {
IterMut { range: RangeMut { front: None, back: None, _marker: PhantomData }, length: 0 }
}
}

Expand Down

0 comments on commit 78a25cb

Please sign in to comment.