Skip to content

Commit

Permalink
optimize timeful table lookup in GC
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Nov 30, 2023
1 parent 9c74fed commit e9e0c3c
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions crates/re_arrow_store/src/store_gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,6 @@ impl DataStore {
/// Returns the list of `RowId`s that were purged from the store.
//
// TODO(jleibs): There are some easy optimizations here if we find GC taking too long:
// - If we stored the entity_path_hash along with timepoints in the metadata_registry we could jump
// directly to the relevant tables instead of needing to iterate over all tables.
// - If we know we are clearing almost everything, then we can batch-clear the rows from the
// the tables instead of needing to iterate over every single row incrementally.
fn gc_drop_at_least_num_bytes(
Expand All @@ -224,22 +222,22 @@ impl DataStore {
// 2. Find all tables that potentially hold data associated with that `RowId`
// 3. Drop the associated row and account for the space we got back

for (row_id, (timepoint, entity_path_hash)) in &self.metadata_registry.registry {
for (&row_id, (timepoint, entity_path_hash)) in &self.metadata_registry.registry {
if num_bytes_to_drop <= 0.0 {
break;
}

if protected_rows.contains(row_id) {
if protected_rows.contains(&row_id) {
continue;
}

let mut diff: Option<StoreDiff> = None;

// find all tables that could possibly contain this `RowId`
for ((timeline, _), table) in &mut self.tables {
if let Some(time) = timepoint.get(timeline) {
for (&timeline, &time) in timepoint {
if let Some(table) = self.tables.get_mut(&(timeline, *entity_path_hash)) {
let (removed, num_bytes_removed) =
table.try_drop_row(&self.cluster_cell_cache, *row_id, time.as_i64());
table.try_drop_row(&self.cluster_cell_cache, row_id, time.as_i64());
if let Some(inner) = diff.as_mut() {
if let Some(removed) = removed {
diff = inner.union(&removed);
Expand All @@ -257,7 +255,7 @@ impl DataStore {
for table in self.timeless_tables.values_mut() {
// let deleted_comps = deleted.timeless.entry(ent_path.clone()_hash).or_default();
let (removed, num_bytes_removed) =
table.try_drop_row(&self.cluster_cell_cache, *row_id);
table.try_drop_row(&self.cluster_cell_cache, row_id);
if let Some(inner) = diff.as_mut() {
if let Some(removed) = removed {
diff = inner.union(&removed);
Expand Down

0 comments on commit e9e0c3c

Please sign in to comment.