Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify the entry and insert code #126

Merged
merged 4 commits into from
Jun 9, 2020
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 67 additions & 104 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,12 +351,6 @@ fn probe_distance(mask: usize, hash: HashValue, current: usize) -> usize {
current.wrapping_sub(desired_pos(mask, hash)) & mask
}

enum Inserted<V> {
Done,
Swapped { prev_value: V },
RobinHood { probe: usize, old_pos: Pos },
}

impl<K, V, S> fmt::Debug for IndexMap<K, V, S>
where
K: fmt::Debug + Hash + Eq,
Expand Down Expand Up @@ -840,13 +834,13 @@ where
K: Hash + Eq,
S: BuildHasher,
{
// FIXME: reduce duplication (compare with insert)
fn entry_phase_1<Sz>(&mut self, key: K) -> Entry<K, V>
fn insert_phase_1<'a, Sz, A>(&'a mut self, key: K, action: A) -> A::Output
where
Sz: Size,
A: ProbeAction<'a, Sz, K, V>,
{
let hash = hash_elem_using(&self.hash_builder, &key);
self.core.entry_phase_1::<Sz>(hash, key)
self.core.insert_phase_1::<Sz, A>(hash, key, action)
}

/// Remove all key-value pairs in the map, while preserving its capacity.
Expand All @@ -865,24 +859,12 @@ where
}
}

// First phase: Look for the preferred location for key.
//
// We will know if `key` is already in the map, before we need to insert it.
// When we insert they key, it might be that we need to continue displacing
// entries (robin hood hashing), in which case Inserted::RobinHood is returned
fn insert_phase_1<Sz>(&mut self, key: K, value: V) -> Inserted<V>
where
Sz: Size,
{
let hash = hash_elem_using(&self.hash_builder, &key);
self.core.insert_phase_1::<Sz>(hash, key, value)
}

fn reserve_one(&mut self) {
if self.len() == self.capacity() {
dispatch_32_vs_64!(self.double_capacity());
}
}

fn double_capacity<Sz>(&mut self)
where
Sz: Size,
Expand All @@ -904,26 +886,7 @@ where
/// See also [`entry`](#method.entry) if you you want to insert *or* modify
/// or if you need to get the index of the corresponding key-value pair.
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
self.reserve_one();
if self.size_class_is_64bit() {
match self.insert_phase_1::<u64>(key, value) {
Inserted::Swapped { prev_value } => Some(prev_value),
Inserted::Done => None,
Inserted::RobinHood { probe, old_pos } => {
self.core.insert_phase_2::<u64>(probe, old_pos);
None
}
}
} else {
match self.insert_phase_1::<u32>(key, value) {
Inserted::Swapped { prev_value } => Some(prev_value),
Inserted::Done => None,
Inserted::RobinHood { probe, old_pos } => {
self.core.insert_phase_2::<u32>(probe, old_pos);
None
}
}
}
self.insert_full(key, value).1
}

/// Insert a key-value pair in the map, and get their index.
Expand All @@ -940,16 +903,8 @@ where
/// See also [`entry`](#method.entry) if you you want to insert *or* modify
/// or if you need to get the index of the corresponding key-value pair.
pub fn insert_full(&mut self, key: K, value: V) -> (usize, Option<V>) {
let entry = self.entry(key);
let index = entry.index();

match entry {
Entry::Occupied(mut entry) => (index, Some(entry.insert(value))),
Entry::Vacant(entry) => {
entry.insert(value);
(index, None)
}
}
self.reserve_one();
dispatch_32_vs_64!(self.insert_phase_1::<_>(key, InsertValue(value)))
}

/// Get the given key’s corresponding entry in the map for insertion and/or
Expand All @@ -958,7 +913,7 @@ where
/// Computes in **O(1)** time (amortized average).
pub fn entry(&mut self, key: K) -> Entry<K, V> {
self.reserve_one();
dispatch_32_vs_64!(self.entry_phase_1(key))
dispatch_32_vs_64!(self.insert_phase_1::<_>(key, MakeEntry))
}

/// Return an iterator over the key-value pairs of the map, in their order
Expand Down Expand Up @@ -1451,11 +1406,11 @@ impl<K, V> OrderMapCore<K, V> {
Some(self.swap_remove_found(probe, found))
}

// FIXME: reduce duplication (compare with insert)
fn entry_phase_1<Sz>(&mut self, hash: HashValue, key: K) -> Entry<K, V>
fn insert_phase_1<'a, Sz, A>(&'a mut self, hash: HashValue, key: K, action: A) -> A::Output
where
Sz: Size,
K: Eq,
A: ProbeAction<'a, Sz, K, V>,
{
let mut probe = desired_pos(self.mask, hash);
let mut dist = 0;
Expand All @@ -1467,14 +1422,14 @@ impl<K, V> OrderMapCore<K, V> {
let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe);
if their_dist < dist {
// robin hood: steal the spot if it's better for us
return Entry::Vacant(VacantEntry {
return action.steal(VacantEntry {
map: self,
hash: hash,
key: key,
probe: probe,
});
} else if entry_hash == hash && self.entries[i].key == key {
return Entry::Occupied(OccupiedEntry {
return action.hit(OccupiedEntry {
map: self,
key: key,
probe: probe,
Expand All @@ -1483,7 +1438,7 @@ impl<K, V> OrderMapCore<K, V> {
}
} else {
// empty bucket, insert here
return Entry::Vacant(VacantEntry {
return action.empty(VacantEntry {
map: self,
hash: hash,
key: key,
Expand All @@ -1494,52 +1449,6 @@ impl<K, V> OrderMapCore<K, V> {
});
}

// First phase: Look for the preferred location for key.
//
// We will know if `key` is already in the map, before we need to insert it.
// When we insert they key, it might be that we need to continue displacing
// entries (robin hood hashing), in which case Inserted::RobinHood is returned
fn insert_phase_1<Sz>(&mut self, hash: HashValue, key: K, value: V) -> Inserted<V>
where
Sz: Size,
K: Eq,
{
let mut probe = desired_pos(self.mask, hash);
let mut dist = 0;
let insert_kind;
debug_assert!(self.len() < self.raw_capacity());
probe_loop!(probe < self.indices.len(), {
let pos = &mut self.indices[probe];
if let Some((i, hash_proxy)) = pos.resolve::<Sz>() {
let entry_hash = hash_proxy.get_short_hash(&self.entries, i);
// if existing element probed less than us, swap
let their_dist = probe_distance(self.mask, entry_hash.into_hash(), probe);
if their_dist < dist {
// robin hood: steal the spot if it's better for us
let index = self.entries.len();
insert_kind = Inserted::RobinHood {
probe: probe,
old_pos: Pos::with_hash::<Sz>(index, hash),
};
break;
} else if entry_hash == hash && self.entries[i].key == key {
return Inserted::Swapped {
prev_value: replace(&mut self.entries[i].value, value),
};
}
} else {
// empty bucket, insert here
let index = self.entries.len();
*pos = Pos::with_hash::<Sz>(index, hash);
insert_kind = Inserted::Done;
break;
}
dist += 1;
});
self.entries.push(Bucket { hash, key, value });
insert_kind
}

/// phase 2 is post-insert where we forward-shift `Pos` in the indices.
fn insert_phase_2<Sz>(&mut self, mut probe: usize, mut old_pos: Pos)
where
Expand Down Expand Up @@ -1785,6 +1694,60 @@ impl<K, V> OrderMapCore<K, V> {
}
}

trait ProbeAction<'a, Sz: Size, K, V>: Sized {
type Output;
// handle an occupied spot in the map
fn hit(self, entry: OccupiedEntry<'a, K, V>) -> Self::Output;
mwillsey marked this conversation as resolved.
Show resolved Hide resolved
// handle an empty spot in the map
fn empty(self, entry: VacantEntry<'a, K, V>) -> Self::Output;
// robin hood: handle a spot that you should steal because it's better for you
fn steal(self, entry: VacantEntry<'a, K, V>) -> Self::Output;
}

struct InsertValue<V>(V);

impl<'a, Sz: Size, K, V> ProbeAction<'a, Sz, K, V> for InsertValue<V> {
type Output = (usize, Option<V>);

fn hit(self, entry: OccupiedEntry<'a, K, V>) -> Self::Output {
let old = replace(&mut entry.map.entries[entry.index].value, self.0);
(entry.index, Some(old))
}

fn empty(self, entry: VacantEntry<'a, K, V>) -> Self::Output {
mwillsey marked this conversation as resolved.
Show resolved Hide resolved
let pos = &mut entry.map.indices[entry.probe];
let index = entry.map.entries.len();
*pos = Pos::with_hash::<Sz>(index, entry.hash);
entry.map.entries.push(Bucket {
hash: entry.hash,
key: entry.key,
value: self.0,
});
(index, None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method body seems "loose" like this - could it not just call a method on the entry? I general, it's hard to follow the insert logic when it's spread out.

(This comment is rather perfectionist, and not something I wanted to make a requirement to fix before merging).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked at this -- you'd basically want VacantEntry::insert_impl without the probe loop (since we know its empty). But this needs to consume the entry's key, which makes it harder to reuse that code, so I think we'd end up writing this code separately either way. i.e. It doesn't really matter whether we do it here or there.

(And frankly, a lot of this will disappear if I land the hashbrown changes...)

}

fn steal(self, entry: VacantEntry<'a, K, V>) -> Self::Output {
let index = entry.map.entries.len();
Copy link
Member

@bluss bluss Jun 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could be just entry.insert_impl::<Sz>(self.0) right? Something like that to reduce duplication - even hit could use that(?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah good idea. Do you mean empty could use that? I was afraid that entering the probe loop another time would be costly (technically, you don't have to in the empty case), but it seems to be fine.

Copy link
Contributor Author

@mwillsey mwillsey Jun 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nvm, turns out doing it in empty isn't great, it basically removes the fast path that the old insert code had, making steal and empty the same.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we then maybe identifying the main thing that slows down entry compared with the old insert? Maybe if we updated entry's VacantEntry to skip that loop, for the non-steal case, that insert and entry would match each other in performance?

entry.insert_impl::<Sz>(self.0);
(index, None)
}
}

struct MakeEntry;

impl<'a, Sz: Size, K: 'a, V: 'a> ProbeAction<'a, Sz, K, V> for MakeEntry {
type Output = Entry<'a, K, V>;
fn hit(self, entry: OccupiedEntry<'a, K, V>) -> Self::Output {
Entry::Occupied(entry)
}
fn empty(self, entry: VacantEntry<'a, K, V>) -> Self::Output {
cuviper marked this conversation as resolved.
Show resolved Hide resolved
Entry::Vacant(entry)
}
fn steal(self, entry: VacantEntry<'a, K, V>) -> Self::Output {
Entry::Vacant(entry)
}
}

/// Find, in the indices, an entry that already exists at a known position
/// inside self.entries in the IndexMap.
///
Expand Down