diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs index c93a493cadb0d..8e1d374b5d42b 100644 --- a/library/alloc/benches/vec.rs +++ b/library/alloc/benches/vec.rs @@ -732,3 +732,18 @@ fn bench_flat_map_collect(b: &mut Bencher) { let v = vec![777u32; 500000]; b.iter(|| v.iter().flat_map(|color| color.rotate_left(8).to_be_bytes()).collect::>()); } + +#[bench] +fn bench_retain_100000(b: &mut Bencher) { + let v = (1..=100000).collect::>(); + b.iter(|| { + let mut v = v.clone(); + v.retain(|x| x & 1 == 0) + }); +} + +#[bench] +fn bench_retain_whole_100000(b: &mut Bencher) { + let mut v = black_box(vec![826u32; 100000]); + b.iter(|| v.retain(|x| *x == 826u32)); +} diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 87a0d37181562..6ad3bbf2f3163 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1485,7 +1485,15 @@ impl Vec { let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len }; - while g.processed_len < original_len { + // process_one return a bool indicates whether the processing element should be retained. + #[inline(always)] + fn process_one( + f: &mut F, + g: &mut BackshiftOnDrop<'_, T, A>, + ) -> bool + where + F: FnMut(&T) -> bool, + { // SAFETY: Unchecked element must be valid. let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; if !f(cur) { @@ -1495,9 +1503,9 @@ impl Vec { // SAFETY: We never touch this element again after dropped. unsafe { ptr::drop_in_place(cur) }; // We already advanced the counter. - continue; + return false; } - if g.deleted_cnt > 0 { + if DELETED { // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. // We use copy for move, and never touch this element again. unsafe { @@ -1506,6 +1514,19 @@ impl Vec { } } g.processed_len += 1; + return true; + } + + // Stage 1: Nothing was deleted. + while g.processed_len != original_len { + if !process_one::(&mut f, &mut g) { + break; + } + } + + // Stage 2: Some elements were deleted. + while g.processed_len != original_len { + process_one::(&mut f, &mut g); } // All item are processed. This can be optimized to `set_len` by LLVM.