Skip to content

Commit

Permalink
Rollup merge of rust-lang#54058 - Kerollmops:slice-dedup, r=shepmaster
Browse files Browse the repository at this point in the history
Introduce the partition_dedup/by/by_key methods for slices

This PR propose to add three methods to the slice type, the `partition_dedup`, `partition_dedup_by` and `partition_dedup_by_key`. The two other methods are based on `slice::partition_dedup_by`.

These methods take a mutable slice, deduplicates it and moves all duplicates to the end of it, returning two mutable slices, the first containing the deduplicated elements and the second all the duplicates unordered.

```rust
let mut slice = [1, 2, 2, 3, 3, 2];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, [1, 2, 3, 2]);
assert_eq!(duplicates, [3, 2]);
```

The benefits of adding these methods is that it is now possible to:
  - deduplicate a slice without having to allocate and possibly clone elements on the heap, really useful for embedded stuff that can't allocate for example.
  - not loose duplicate elements, because, when using `Vec::dedup`, duplicates elements are dropped. These methods add more flexibillity to the user.

Note that this is near a copy/paste of the `Vec::dedup_by` function, once this method is stable the goal is to replace the algorithm in `Vec` by the following.

```rust
pub fn Vec::dedup_by<F>(&mut self, same_bucket: F)
    where F: FnMut(&mut T, &mut T) -> bool
{
    let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
    let len = dedup.len();
    self.truncate(len);
}
```
  • Loading branch information
pietroalbini authored Sep 25, 2018
2 parents 707c979 + d560292 commit b940e1d
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 89 deletions.
1 change: 1 addition & 0 deletions src/liballoc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
#![feature(exact_chunks)]
#![feature(rustc_const_unstable)]
#![feature(const_vec_new)]
#![feature(slice_partition_dedup)]
#![feature(maybe_uninit)]

// Allow testing this library
Expand Down
100 changes: 11 additions & 89 deletions src/liballoc/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -947,10 +947,9 @@ impl<T> Vec<T> {
/// Removes all but the first of consecutive elements in the vector satisfying a given equality
/// relation.
///
/// The `same_bucket` function is passed references to two elements from the vector, and
/// returns `true` if the elements compare equal, or `false` if they do not. The elements are
/// passed in opposite order from their order in the vector, so if `same_bucket(a, b)` returns
/// `true`, `a` is removed.
/// The `same_bucket` function is passed references to two elements from the vector and
/// must determine if the elements compare equal. The elements are passed in opposite order
/// from their order in the slice, so if `same_bucket(a, b)` returns `true`, `a` is removed.
///
/// If the vector is sorted, this removes all duplicates.
///
Expand All @@ -964,90 +963,12 @@ impl<T> Vec<T> {
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
/// ```
#[stable(feature = "dedup_by", since = "1.16.0")]
pub fn dedup_by<F>(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool {
unsafe {
// Although we have a mutable reference to `self`, we cannot make
// *arbitrary* changes. The `same_bucket` calls could panic, so we
// must ensure that the vector is in a valid state at all time.
//
// The way that we handle this is by using swaps; we iterate
// over all the elements, swapping as we go so that at the end
// the elements we wish to keep are in the front, and those we
// wish to reject are at the back. We can then truncate the
// vector. This operation is still O(n).
//
// Example: We start in this state, where `r` represents "next
// read" and `w` represents "next_write`.
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate, so
// we swap self[r] and self[w] (no effect as r==w) and then increment both
// r and w, leaving us with:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this value is a duplicate,
// so we increment `r` but leave everything else unchanged:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate,
// so swap self[r] and self[w] and advance r and w:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 1 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Not a duplicate, repeat:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 3 | 1 | 3 |
// +---+---+---+---+---+---+
// w
//
// Duplicate, advance r. End of vec. Truncate to w.

let ln = self.len();
if ln <= 1 {
return;
}

// Avoid bounds checks by using raw pointers.
let p = self.as_mut_ptr();
let mut r: usize = 1;
let mut w: usize = 1;

while r < ln {
let p_r = p.add(r);
let p_wm1 = p.add(w - 1);
if !same_bucket(&mut *p_r, &mut *p_wm1) {
if r != w {
let p_w = p_wm1.offset(1);
mem::swap(&mut *p_r, &mut *p_w);
}
w += 1;
}
r += 1;
}

self.truncate(w);
}
pub fn dedup_by<F>(&mut self, same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool {
let len = {
let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
dedup.len()
};
self.truncate(len);
}

/// Appends an element to the back of a collection.
Expand Down Expand Up @@ -1533,7 +1454,8 @@ impl<'a> Drop for SetLenOnDrop<'a> {
}

impl<T: PartialEq> Vec<T> {
/// Removes consecutive repeated elements in the vector.
/// Removes consecutive repeated elements in the vector according to the
/// [`PartialEq`] trait implementation.
///
/// If the vector is sorted, this removes all duplicates.
///
Expand Down
172 changes: 172 additions & 0 deletions src/libcore/slice/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,178 @@ impl<T> [T] {
sort::quicksort(self, |a, b| f(a).lt(&f(b)));
}

/// Moves all consecutive repeated elements to the end of the slice according to the
/// [`PartialEq`] trait implementation.
///
/// Returns two slices. The first contains no consecutive repeated elements.
/// The second contains all the duplicates in no specified order.
///
/// If the slice is sorted, the first returned slice contains no duplicates.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_dedup)]
///
/// let mut slice = [1, 2, 2, 3, 3, 2, 1, 1];
///
/// let (dedup, duplicates) = slice.partition_dedup();
///
/// assert_eq!(dedup, [1, 2, 3, 2, 1]);
/// assert_eq!(duplicates, [2, 3, 1]);
/// ```
#[unstable(feature = "slice_partition_dedup", issue = "54279")]
#[inline]
pub fn partition_dedup(&mut self) -> (&mut [T], &mut [T])
where T: PartialEq
{
self.partition_dedup_by(|a, b| a == b)
}

/// Moves all but the first of consecutive elements to the end of the slice satisfying
/// a given equality relation.
///
/// Returns two slices. The first contains no consecutive repeated elements.
/// The second contains all the duplicates in no specified order.
///
/// The `same_bucket` function is passed references to two elements from the slice and
/// must determine if the elements compare equal. The elements are passed in opposite order
/// from their order in the slice, so if `same_bucket(a, b)` returns `true`, `a` is moved
/// at the end of the slice.
///
/// If the slice is sorted, the first returned slice contains no duplicates.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_dedup)]
///
/// let mut slice = ["foo", "Foo", "BAZ", "Bar", "bar", "baz", "BAZ"];
///
/// let (dedup, duplicates) = slice.partition_dedup_by(|a, b| a.eq_ignore_ascii_case(b));
///
/// assert_eq!(dedup, ["foo", "BAZ", "Bar", "baz"]);
/// assert_eq!(duplicates, ["bar", "Foo", "BAZ"]);
/// ```
#[unstable(feature = "slice_partition_dedup", issue = "54279")]
#[inline]
pub fn partition_dedup_by<F>(&mut self, mut same_bucket: F) -> (&mut [T], &mut [T])
where F: FnMut(&mut T, &mut T) -> bool
{
// Although we have a mutable reference to `self`, we cannot make
// *arbitrary* changes. The `same_bucket` calls could panic, so we
// must ensure that the slice is in a valid state at all times.
//
// The way that we handle this is by using swaps; we iterate
// over all the elements, swapping as we go so that at the end
// the elements we wish to keep are in the front, and those we
// wish to reject are at the back. We can then split the slice.
// This operation is still O(n).
//
// Example: We start in this state, where `r` represents "next
// read" and `w` represents "next_write`.
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate, so
// we swap self[r] and self[w] (no effect as r==w) and then increment both
// r and w, leaving us with:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this value is a duplicate,
// so we increment `r` but leave everything else unchanged:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate,
// so swap self[r] and self[w] and advance r and w:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 1 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Not a duplicate, repeat:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 3 | 1 | 3 |
// +---+---+---+---+---+---+
// w
//
// Duplicate, advance r. End of slice. Split at w.

let len = self.len();
if len <= 1 {
return (self, &mut [])
}

let ptr = self.as_mut_ptr();
let mut next_read: usize = 1;
let mut next_write: usize = 1;

unsafe {
// Avoid bounds checks by using raw pointers.
while next_read < len {
let ptr_read = ptr.add(next_read);
let prev_ptr_write = ptr.add(next_write - 1);
if !same_bucket(&mut *ptr_read, &mut *prev_ptr_write) {
if next_read != next_write {
let ptr_write = prev_ptr_write.offset(1);
mem::swap(&mut *ptr_read, &mut *ptr_write);
}
next_write += 1;
}
next_read += 1;
}
}

self.split_at_mut(next_write)
}

/// Moves all but the first of consecutive elements to the end of the slice that resolve
/// to the same key.
///
/// Returns two slices. The first contains no consecutive repeated elements.
/// The second contains all the duplicates in no specified order.
///
/// If the slice is sorted, the first returned slice contains no duplicates.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_dedup)]
///
/// let mut slice = [10, 20, 21, 30, 30, 20, 11, 13];
///
/// let (dedup, duplicates) = slice.partition_dedup_by_key(|i| *i / 10);
///
/// assert_eq!(dedup, [10, 20, 30, 20, 11]);
/// assert_eq!(duplicates, [21, 30, 13]);
/// ```
#[unstable(feature = "slice_partition_dedup", issue = "54279")]
#[inline]
pub fn partition_dedup_by_key<K, F>(&mut self, mut key: F) -> (&mut [T], &mut [T])
where F: FnMut(&mut T) -> K,
K: PartialEq,
{
self.partition_dedup_by(|a, b| key(a) == key(b))
}

/// Rotates the slice in-place such that the first `mid` elements of the
/// slice move to the end while the last `self.len() - mid` elements move to
/// the front. After calling `rotate_left`, the element previously at index
Expand Down
1 change: 1 addition & 0 deletions src/libcore/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#![feature(inner_deref)]
#![feature(slice_internals)]
#![feature(option_replace)]
#![feature(slice_partition_dedup)]
#![feature(copy_within)]

extern crate core;
Expand Down
59 changes: 59 additions & 0 deletions src/libcore/tests/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,65 @@ fn test_align_to_empty_mid() {
}
}

#[test]
fn test_slice_partition_dedup_by() {
let mut slice: [i32; 9] = [1, -1, 2, 3, 1, -5, 5, -2, 2];

let (dedup, duplicates) = slice.partition_dedup_by(|a, b| a.abs() == b.abs());

assert_eq!(dedup, [1, 2, 3, 1, -5, -2]);
assert_eq!(duplicates, [5, -1, 2]);
}

#[test]
fn test_slice_partition_dedup_empty() {
let mut slice: [i32; 0] = [];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, []);
assert_eq!(duplicates, []);
}

#[test]
fn test_slice_partition_dedup_one() {
let mut slice = [12];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, [12]);
assert_eq!(duplicates, []);
}

#[test]
fn test_slice_partition_dedup_multiple_ident() {
let mut slice = [12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, [12, 11]);
assert_eq!(duplicates, [12, 12, 12, 12, 11, 11, 11, 11, 11]);
}

#[test]
fn test_slice_partition_dedup_partialeq() {
#[derive(Debug)]
struct Foo(i32, i32);

impl PartialEq for Foo {
fn eq(&self, other: &Foo) -> bool {
self.0 == other.0
}
}

let mut slice = [Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)];

let (dedup, duplicates) = slice.partition_dedup();

assert_eq!(dedup, [Foo(0, 1), Foo(1, 7)]);
assert_eq!(duplicates, [Foo(0, 5), Foo(1, 9)]);
}

#[test]
fn test_copy_within() {
// Start to end, with a RangeTo.
Expand Down

0 comments on commit b940e1d

Please sign in to comment.