From 1f230d81969a0e61fe34c03700e932cf719d24a8 Mon Sep 17 00:00:00 2001 From: Christopher Berner Date: Sat, 27 Jul 2024 15:09:10 -0700 Subject: [PATCH] Fix page leak when restoring a savepoint Restoring a savepoint dropped processed transactions from the free tree. However, this could lead to pages being leaked, if page A was allocated, then added to the freed tree, then a savepoint was captured, and then later A was freed a allocated again. When the savepoint was restored, A would remain allocated but also be removed from the freed tree --- src/transactions.rs | 39 ++++++++++++- src/tree_store/page_store/page_manager.rs | 13 +++++ tests/integration_tests.rs | 67 +++++++++++++++++++++++ 3 files changed, 118 insertions(+), 1 deletion(-) diff --git a/src/transactions.rs b/src/transactions.rs index 2c238a1e..4080ec1d 100644 --- a/src/transactions.rs +++ b/src/transactions.rs @@ -692,6 +692,18 @@ impl WriteTransaction { assert_eq!(self.mem.get_version(), savepoint.get_version()); self.dirty.store(true, Ordering::Release); + // Restoring a savepoint needs to accomplish the following: + // 1) restore the table tree. This is trivial, since we have the old root + // 2) update the system tree to remove invalid persistent savepoints. + // 3) free all pages that were allocated since the savepoint and are unreachable + // from the restored table tree root. This is non-trivial, since we want to avoid walking + // the entire table tree, and we cannot simply restore the old allocator state as there + // could be read transactions that reference this data. + // 3a) First we free all allocated pages that were not allocated in the savepoint, + // except those referenced by the system table + // 3b) We re-process the freed table referenced by the savepoint, but ignore any pages that + // are already free + let allocated_since_savepoint = self .mem .pages_allocated_since_raw_state(savepoint.get_regional_allocator_states()); @@ -748,10 +760,35 @@ impl WriteTransaction { transaction_id: oldest_unprocessed_transaction, pagination_id: 0, }; + let mut freed_pages = self.freed_pages.lock().unwrap(); + let mut freed_pages_hash = HashSet::new(); + freed_pages_hash.extend(freed_pages.iter()); let mut to_remove = vec![]; for entry in freed_tree.range(&(..lookup_key))? { - to_remove.push(entry?.key()); + let item = entry?; + to_remove.push(item.key()); + let pages: FreedPageList = item.value(); + for i in 0..pages.len() { + let page = pages.get(i); + if self.mem.is_page_out_of_bounds(page) { + // Page no longer exists because the database file shrank + continue; + } + // Re-process the freed pages, but ignore any that would be double-frees + if self.mem.is_allocated(page) + && !freed_pages_hash.contains(&page) + && !referenced_by_system_tree.contains(&page) + { + if self.mem.uncommitted(page) { + self.mem.free(page); + } else { + freed_pages.push(page); + freed_pages_hash.insert(page); + } + } + } } + drop(freed_pages); for key in to_remove { freed_tree.remove(&key)?; } diff --git a/src/tree_store/page_store/page_manager.rs b/src/tree_store/page_store/page_manager.rs index cfd48feb..b670b0d7 100644 --- a/src/tree_store/page_store/page_manager.rs +++ b/src/tree_store/page_store/page_manager.rs @@ -341,6 +341,19 @@ impl TransactionalMemory { Ok(()) } + // Returns true if the page is beyond the last region: i.e. it no longer exists + pub(crate) fn is_page_out_of_bounds(&self, page: PageNumber) -> bool { + let state = self.state.lock().unwrap(); + page.region as usize >= state.allocators.region_allocators.len() + } + + pub(crate) fn is_allocated(&self, page: PageNumber) -> bool { + let state = self.state.lock().unwrap(); + let allocator = state.get_region(page.region); + + allocator.is_allocated(page.page_index, page.page_order) + } + pub(crate) fn mark_pages_allocated( &self, allocated_pages: impl Iterator>, diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index aeca518c..e7ca97a3 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -1002,6 +1002,73 @@ fn regression22() { assert_eq!(allocated_pages, txn.stats().unwrap().allocated_pages()); } +#[test] +fn regression23() { + let tmpfile = create_tempfile(); + + let db = Database::create(tmpfile.path()).unwrap(); + let txn = db.begin_write().unwrap(); + { + // List the savepoints to ensure the system table is created and occupies a page + #[allow(unused_must_use)] + { + txn.list_persistent_savepoints().unwrap(); + } + let mut table = txn.open_table(U64_TABLE).unwrap(); + table.insert(0, 0).unwrap(); + } + txn.commit().unwrap(); + + let txn = db.begin_write().unwrap(); + { + let mut table = txn.open_table(U64_TABLE).unwrap(); + table.remove(0).unwrap(); + } + txn.commit().unwrap(); + + // Extra commit to finalize the cleanup of the freed pages + let txn = db.begin_write().unwrap(); + txn.commit().unwrap(); + + let txn = db.begin_write().unwrap(); + let allocated_pages = txn.stats().unwrap().allocated_pages(); + { + let mut table = txn.open_table(U64_TABLE).unwrap(); + table.insert(0, 0).unwrap(); + } + txn.commit().unwrap(); + + let txn = db.begin_write().unwrap(); + { + let mut table = txn.open_table(U64_TABLE).unwrap(); + table.remove(0).unwrap(); + } + txn.commit().unwrap(); + + let txn = db.begin_write().unwrap(); + let savepoint = txn.ephemeral_savepoint().unwrap(); + txn.commit().unwrap(); + + let txn = db.begin_write().unwrap(); + { + let mut table = txn.open_table(U64_TABLE).unwrap(); + table.insert(0, 0).unwrap(); + } + txn.commit().unwrap(); + + let mut txn = db.begin_write().unwrap(); + txn.restore_savepoint(&savepoint).unwrap(); + txn.commit().unwrap(); + drop(savepoint); + + // Extra commit to finalize the cleanup of the freed pages. + // There was a bug where the restoration of the savepoint would leak pages + db.begin_write().unwrap().commit().unwrap(); + + let txn = db.begin_write().unwrap(); + assert_eq!(allocated_pages, txn.stats().unwrap().allocated_pages()); +} + #[test] fn check_integrity_clean() { let tmpfile = create_tempfile();