Skip to content

Commit

Permalink
term hashmap remove copy in is_empty, unused unordered_id (#2229)
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Apr 10, 2024
1 parent 61ebf13 commit cc248b1
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 32 deletions.
12 changes: 6 additions & 6 deletions columnar/src/columnar/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ impl ColumnarWriter {
let mut columns: Vec<(&[u8], ColumnType, Addr)> = self
.numerical_field_hash_map
.iter()
.map(|(column_name, addr, _)| {
.map(|(column_name, addr)| {
let numerical_column_writer: NumericalColumnWriter =
self.numerical_field_hash_map.read(addr);
let column_type = numerical_column_writer.numerical_type().into();
Expand All @@ -348,27 +348,27 @@ impl ColumnarWriter {
columns.extend(
self.bytes_field_hash_map
.iter()
.map(|(term, addr, _)| (term, ColumnType::Bytes, addr)),
.map(|(term, addr)| (term, ColumnType::Bytes, addr)),
);
columns.extend(
self.str_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnType::Str, addr)),
.map(|(column_name, addr)| (column_name, ColumnType::Str, addr)),
);
columns.extend(
self.bool_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnType::Bool, addr)),
.map(|(column_name, addr)| (column_name, ColumnType::Bool, addr)),
);
columns.extend(
self.ip_addr_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnType::IpAddr, addr)),
.map(|(column_name, addr)| (column_name, ColumnType::IpAddr, addr)),
);
columns.extend(
self.datetime_field_hash_map
.iter()
.map(|(column_name, addr, _)| (column_name, ColumnType::DateTime, addr)),
.map(|(column_name, addr)| (column_name, ColumnType::DateTime, addr)),
);
columns.sort_unstable_by_key(|(column_name, col_type, _)| (*column_name, *col_type));

Expand Down
6 changes: 3 additions & 3 deletions src/indexer/segment_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,9 +510,9 @@ mod tests {

#[test]
fn test_hashmap_size() {
assert_eq!(compute_initial_table_size(100_000).unwrap(), 1 << 11);
assert_eq!(compute_initial_table_size(1_000_000).unwrap(), 1 << 14);
assert_eq!(compute_initial_table_size(15_000_000).unwrap(), 1 << 18);
assert_eq!(compute_initial_table_size(100_000).unwrap(), 1 << 12);
assert_eq!(compute_initial_table_size(1_000_000).unwrap(), 1 << 15);
assert_eq!(compute_initial_table_size(15_000_000).unwrap(), 1 << 19);
assert_eq!(compute_initial_table_size(1_000_000_000).unwrap(), 1 << 19);
assert_eq!(compute_initial_table_size(4_000_000_000).unwrap(), 1 << 19);
}
Expand Down
2 changes: 1 addition & 1 deletion src/postings/postings_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub(crate) fn serialize_postings(
term_offsets.extend(
ctx.term_index
.iter()
.map(|(bytes, addr, _unordered_id)| (Term::wrap(bytes), addr)),
.map(|(bytes, addr)| (Term::wrap(bytes), addr)),
);
term_offsets.sort_unstable_by_key(|(k, _)| k.clone());

Expand Down
33 changes: 11 additions & 22 deletions stacker/src/arena_hashmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::mem;
use super::{Addr, MemoryArena};
use crate::fastcpy::fast_short_slice_copy;
use crate::memory_arena::store;
use crate::UnorderedId;

/// Returns the actual memory size in bytes
/// required to create a table with a given capacity.
Expand All @@ -26,22 +25,20 @@ type HashType = u64;
struct KeyValue {
key_value_addr: Addr,
hash: HashType,
unordered_id: UnorderedId,
}

impl Default for KeyValue {
fn default() -> Self {
KeyValue {
key_value_addr: Addr::null_pointer(),
hash: 0,
unordered_id: UnorderedId::default(),
}
}
}

impl KeyValue {
#[inline]
fn is_empty(self) -> bool {
fn is_empty(&self) -> bool {
self.key_value_addr.is_null()
}
#[inline]
Expand Down Expand Up @@ -96,12 +93,12 @@ pub struct Iter<'a> {
}

impl<'a> Iterator for Iter<'a> {
type Item = (&'a [u8], Addr, UnorderedId);
type Item = (&'a [u8], Addr);

fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(move |kv| {
let (key, offset): (&'a [u8], Addr) = self.hashmap.get_key_value(kv.key_value_addr);
(key, offset, kv.unordered_id)
(key, offset)
})
}
}
Expand Down Expand Up @@ -207,16 +204,13 @@ impl ArenaHashMap {
}

#[inline]
fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) -> UnorderedId {
let unordered_id = self.len as UnorderedId;
fn set_bucket(&mut self, hash: HashType, key_value_addr: Addr, bucket: usize) {
self.len += 1;

self.table[bucket] = KeyValue {
key_value_addr,
hash,
unordered_id,
};
unordered_id
}

#[inline]
Expand Down Expand Up @@ -290,14 +284,8 @@ impl ArenaHashMap {
/// If the key already as an associated value, then it will be passed
/// `Some(previous_value)`.
#[inline]
pub fn mutate_or_create<V>(
&mut self,
key: &[u8],
mut updater: impl FnMut(Option<V>) -> V,
) -> UnorderedId
where
V: Copy + 'static,
{
pub fn mutate_or_create<V>(&mut self, key: &[u8], mut updater: impl FnMut(Option<V>) -> V)
where V: Copy + 'static {
if self.is_saturated() {
self.resize();
}
Expand All @@ -320,14 +308,15 @@ impl ArenaHashMap {
store(&mut data[stop..], val);
}

return self.set_bucket(hash, key_addr, bucket);
self.set_bucket(hash, key_addr, bucket);
return;
}
if kv.hash == hash {
if let Some(val_addr) = self.get_value_addr_if_key_match(key, kv.key_value_addr) {
let v = self.memory_arena.read(val_addr);
let new_v = updater(Some(v));
self.memory_arena.write_at(val_addr, new_v);
return kv.unordered_id;
return;
}
}
// This allows fetching the next bucket before the loop jmp
Expand Down Expand Up @@ -361,7 +350,7 @@ mod tests {
});
let mut vanilla_hash_map = HashMap::new();
let iter_values = hash_map.iter();
for (key, addr, _) in iter_values {
for (key, addr) in iter_values {
let val: u32 = hash_map.memory_arena.read(addr);
vanilla_hash_map.insert(key.to_owned(), val);
}
Expand Down Expand Up @@ -390,7 +379,7 @@ mod tests {
}
let mut terms_back: Vec<String> = hash_map
.iter()
.map(|(bytes, _, _)| String::from_utf8(bytes.to_vec()).unwrap())
.map(|(bytes, _)| String::from_utf8(bytes.to_vec()).unwrap())
.collect();
terms_back.sort();
terms.sort();
Expand Down

0 comments on commit cc248b1

Please sign in to comment.