From 981d157ac6c7fdc2cce2eb67d10487ad15dbdfe6 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 29 Nov 2023 14:24:13 +0100 Subject: [PATCH] optimize timeful table lookup in GC --- crates/re_arrow_store/src/store_gc.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/re_arrow_store/src/store_gc.rs b/crates/re_arrow_store/src/store_gc.rs index 5d4dad4b3ae1..3399b5aed717 100644 --- a/crates/re_arrow_store/src/store_gc.rs +++ b/crates/re_arrow_store/src/store_gc.rs @@ -205,8 +205,6 @@ impl DataStore { /// Returns the list of `RowId`s that were purged from the store. // // TODO(jleibs): There are some easy optimizations here if we find GC taking too long: - // - If we stored the entity_path_hash along with timepoints in the metadata_registry we could jump - // directly to the relevant tables instead of needing to iterate over all tables. // - If we know we are clearing almost everything, then we can batch-clear the rows from the // the tables instead of needing to iterate over every single row incrementally. fn gc_drop_at_least_num_bytes( @@ -224,22 +222,22 @@ impl DataStore { // 2. Find all tables that potentially hold data associated with that `RowId` // 3. Drop the associated row and account for the space we got back - for (row_id, (timepoint, entity_path_hash)) in &self.metadata_registry.registry { + for (&row_id, (timepoint, entity_path_hash)) in &self.metadata_registry.registry { if num_bytes_to_drop <= 0.0 { break; } - if protected_rows.contains(row_id) { + if protected_rows.contains(&row_id) { continue; } let mut diff: Option = None; // find all tables that could possibly contain this `RowId` - for ((timeline, _), table) in &mut self.tables { - if let Some(time) = timepoint.get(timeline) { + for (&timeline, &time) in timepoint { + if let Some(table) = self.tables.get_mut(&(timeline, *entity_path_hash)) { let (removed, num_bytes_removed) = - table.try_drop_row(&self.cluster_cell_cache, *row_id, time.as_i64()); + table.try_drop_row(&self.cluster_cell_cache, row_id, time.as_i64()); if let Some(inner) = diff.as_mut() { if let Some(removed) = removed { diff = inner.union(&removed); @@ -257,7 +255,7 @@ impl DataStore { for table in self.timeless_tables.values_mut() { // let deleted_comps = deleted.timeless.entry(ent_path.clone()_hash).or_default(); let (removed, num_bytes_removed) = - table.try_drop_row(&self.cluster_cell_cache, *row_id); + table.try_drop_row(&self.cluster_cell_cache, row_id); if let Some(inner) = diff.as_mut() { if let Some(removed) = removed { diff = inner.union(&removed);