From cf0049611bd8c912928d4635d30fee953f4ac506 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Mon, 9 Aug 2021 23:24:08 +0800 Subject: [PATCH 01/42] Update the CI to run cargo test in debug mode --- .github/workflows/CI.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 89e7469a..82c27af1 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -52,9 +52,22 @@ jobs: uses: actions-rs/cargo@v1 with: command: test - args: --release + args: --features future - name: Run tests (future) + uses: actions-rs/cargo@v1 + if: ${{ matrix.rust != '1.45.2' }} + with: + command: test + args: --features future + + - name: Run tests (release, no features) + uses: actions-rs/cargo@v1 + with: + command: test + args: --release + + - name: Run tests (release, future) uses: actions-rs/cargo@v1 if: ${{ matrix.rust != '1.45.2' }} with: From 76bb8a410f523a1908bfe3ee68b8b36abed80856 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Mon, 9 Aug 2021 23:39:25 +0800 Subject: [PATCH 02/42] Support weight-based (cost-based) eviction and unbound cache - Add `total_weight` field and optional `max_weight` and `weighter` closure fields to `InnerCache`. - Rename `max_capacity` to `max_entries` and make it optional. - Update `sync` and child methods of `InnerCache` to check and update the `total_weight` value during processing inserts, updates, invalidations and evictions. --- src/common.rs | 2 + src/future/builder.rs | 29 +++--- src/future/cache.rs | 37 +++++--- src/sync.rs | 8 +- src/sync/base_cache.rs | 199 ++++++++++++++++++++++++++++++++-------- src/sync/builder.rs | 26 +++--- src/sync/cache.rs | 27 +++--- src/sync/invalidator.rs | 13 ++- src/sync/segment.rs | 28 +++--- 9 files changed, 264 insertions(+), 105 deletions(-) diff --git a/src/common.rs b/src/common.rs index 12c4793b..b7279bc0 100644 --- a/src/common.rs +++ b/src/common.rs @@ -6,6 +6,8 @@ pub(crate) mod frequency_sketch; pub(crate) mod thread_pool; pub(crate) mod unsafe_weak_pointer; +pub(crate) type Weighter = Box u64 + Send + Sync + 'static>; + pub(crate) trait AccessTime { fn last_accessed(&self) -> Option; fn set_last_accessed(&mut self, timestamp: Instant); diff --git a/src/future/builder.rs b/src/future/builder.rs index 5cbf2e68..3070bb0f 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -37,9 +37,9 @@ use std::{ /// ``` /// pub struct CacheBuilder { - max_capacity: usize, + max_entries: Option, + // max_weight: Option, initial_capacity: Option, - // num_segments: Option, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -51,13 +51,11 @@ where K: Eq + Hash + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - /// Construct a new `CacheBuilder` that will be used to build a `Cache` holding - /// up to `max_capacity` entries. - pub fn new(max_capacity: usize) -> Self { + pub(crate) fn unbound() -> Self { Self { - max_capacity, + max_entries: None, + // max_weight: None, initial_capacity: None, - // num_segments: None, time_to_live: None, time_to_idle: None, invalidator_enabled: false, @@ -65,11 +63,20 @@ where } } + /// Construct a new `CacheBuilder` that will be used to build a `Cache` holding + /// up to `max_capacity` entries. + pub fn new(max_capacity: usize) -> Self { + Self { + max_entries: Some(max_capacity), + ..Self::unbound() + } + } + /// Builds a `Cache`. pub fn build(self) -> Cache { let build_hasher = RandomState::default(); Cache::with_everything( - self.max_capacity, + self.max_entries, self.initial_capacity, build_hasher, self.time_to_live, @@ -84,7 +91,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { Cache::with_everything( - self.max_capacity, + self.max_entries, self.initial_capacity, hasher, self.time_to_live, @@ -151,7 +158,7 @@ mod tests { // Cache let cache = CacheBuilder::new(100).build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_entries(), Some(100)); assert_eq!(cache.time_to_live(), None); assert_eq!(cache.time_to_idle(), None); assert_eq!(cache.num_segments(), 1); @@ -164,7 +171,7 @@ mod tests { .time_to_idle(Duration::from_secs(15 * 60)) .build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_entries(), Some(100)); assert_eq!(cache.time_to_live(), Some(Duration::from_secs(45 * 60))); assert_eq!(cache.time_to_idle(), Some(Duration::from_secs(15 * 60))); assert_eq!(cache.num_segments(), 1); diff --git a/src/future/cache.rs b/src/future/cache.rs index 6947199d..956500a8 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -1,12 +1,12 @@ use super::{ value_initializer::{InitResult, ValueInitializer}, - ConcurrentCacheExt, + CacheBuilder, ConcurrentCacheExt, }; use crate::{ sync::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, - PredicateId, WriteOp, + KeyValueEntry, PredicateId, WriteOp, }, PredicateError, }; @@ -215,15 +215,19 @@ where K: Hash + Eq + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - /// Constructs a new `Cache` that will store up to the `max_capacity` entries. + /// Constructs a new `Cache` that will store up to the `max_entries`. /// /// To adjust various configuration knobs such as `initial_capacity` or /// `time_to_live`, use the [`CacheBuilder`][builder-struct]. /// /// [builder-struct]: ./struct.CacheBuilder.html - pub fn new(max_capacity: usize) -> Self { + pub fn new(max_entries: usize) -> Self { let build_hasher = RandomState::default(); - Self::with_everything(max_capacity, None, build_hasher, None, None, false) + Self::with_everything(Some(max_entries), None, build_hasher, None, None, false) + } + + pub fn builder() -> CacheBuilder> { + CacheBuilder::unbound() } } @@ -234,7 +238,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { pub(crate) fn with_everything( - max_capacity: usize, + max_entries: Option, initial_capacity: Option, build_hasher: S, time_to_live: Option, @@ -243,7 +247,7 @@ where ) -> Self { Self { base: BaseCache::new( - max_capacity, + max_entries, initial_capacity, build_hasher.clone(), time_to_live, @@ -336,8 +340,8 @@ where Arc: Borrow, Q: Hash + Eq + ?Sized, { - if let Some(entry) = self.base.remove(key) { - let op = WriteOp::Remove(entry); + if let Some(KeyValueEntry { key, entry }) = self.base.remove_entry(key) { + let op = WriteOp::Remove(key, entry); let hk = self.base.housekeeper.as_ref(); if Self::schedule_write_op(&self.base.write_op_ch, op, hk) .await @@ -358,8 +362,8 @@ where Arc: Borrow, Q: Hash + Eq + ?Sized, { - if let Some(entry) = self.base.remove(key) { - let op = WriteOp::Remove(entry); + if let Some(KeyValueEntry { key, entry }) = self.base.remove_entry(key) { + let op = WriteOp::Remove(key, entry); let hk = self.base.housekeeper.as_ref(); if Self::blocking_schedule_write_op(&self.base.write_op_ch, op, hk).is_err() { panic!("Failed to remove"); @@ -413,9 +417,14 @@ where self.base.invalidate_entries_if(Arc::new(predicate)) } - /// Returns the `max_capacity` of this cache. - pub fn max_capacity(&self) -> usize { - self.base.max_capacity() + /// Returns the `max_entries` of this cache. + pub fn max_entries(&self) -> Option { + self.base.max_entries() + } + + /// Returns the `max_weight` of this cache. + pub fn max_weight(&self) -> Option { + self.base.max_weight() } /// Returns the `time_to_live` of this cache. diff --git a/src/sync.rs b/src/sync.rs index 7fa63299..d63e1422 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -92,6 +92,11 @@ impl KeyHashDate { } } +pub(crate) struct KeyValueEntry { + pub(crate) key: Arc, + pub(crate) entry: Arc>, +} + // DeqNode for an access order queue. type KeyDeqNodeAo = NonNull>>; @@ -272,11 +277,12 @@ impl AccessTime for DeqNode> { } pub(crate) enum ReadOp { + // u64 is the hash of the key. Hit(u64, Arc>, Instant), Miss(u64), } pub(crate) enum WriteOp { Upsert(KeyHash, Arc>), - Remove(Arc>), + Remove(Arc, Arc>), } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 167fbbed..68bfc5c2 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -2,13 +2,13 @@ use super::{ deques::Deques, housekeeper::{Housekeeper, InnerSync, SyncPace}, invalidator::{GetOrRemoveEntry, InvalidationResult, Invalidator, KeyDateLite, PredicateFun}, - KeyDate, KeyHash, KeyHashDate, PredicateId, ReadOp, ValueEntry, WriteOp, + KeyDate, KeyHash, KeyHashDate, KeyValueEntry, PredicateId, ReadOp, ValueEntry, WriteOp, }; use crate::{ common::{ deque::{CacheRegion, DeqNode, Deque}, frequency_sketch::FrequencySketch, - AccessTime, + AccessTime, Weighter, }, PredicateError, }; @@ -83,7 +83,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { pub(crate) fn new( - max_capacity: usize, + max_entries: Option, initial_capacity: Option, build_hasher: S, time_to_live: Option, @@ -93,7 +93,7 @@ where let (r_snd, r_rcv) = crossbeam_channel::bounded(READ_LOG_SIZE); let (w_snd, w_rcv) = crossbeam_channel::bounded(WRITE_LOG_SIZE); let inner = Arc::new(Inner::new( - max_capacity, + max_entries, initial_capacity, build_hasher, r_rcv, @@ -161,12 +161,12 @@ where } #[inline] - pub(crate) fn remove(&self, key: &Q) -> Option>> + pub(crate) fn remove_entry(&self, key: &Q) -> Option> where Arc: Borrow, Q: Hash + Eq + ?Sized, { - self.inner.remove(key) + self.inner.remove_entry(key) } #[inline] @@ -196,8 +196,12 @@ where self.inner.register_invalidation_predicate(predicate, now) } - pub(crate) fn max_capacity(&self) -> usize { - self.inner.max_capacity() + pub(crate) fn max_entries(&self) -> Option { + self.inner.max_entries() + } + + pub(crate) fn max_weight(&self) -> Option { + self.inner.max_weight() } pub(crate) fn time_to_live(&self) -> Option { @@ -343,12 +347,37 @@ where } } +struct TotalWeight<'a, K, V> { + total: u64, + weighter: Option<&'a Weighter>, +} + +impl<'a, K, V> TotalWeight<'a, K, V> { + fn saturating_add(&mut self, key: &K, value: &V) { + if let Some(weighter) = &self.weighter { + let total = &mut self.total; + let weight = weighter(key, value); + *total = total.saturating_add(weight); + } + } + + fn saturating_sub(&mut self, key: &K, value: &V) { + if let Some(weighter) = &self.weighter { + let total = &mut self.total; + let weight = weighter(key, value); + *total = total.saturating_sub(weight); + } + } +} + type CacheStore = moka_cht::SegmentedHashMap, Arc>, S>; type CacheEntry = (Arc, Arc>); pub(crate) struct Inner { - max_capacity: usize, + max_entries: Option, + max_weight: Option, + total_weight: AtomicU64, cache: CacheStore, build_hasher: S, deques: Mutex>, @@ -358,6 +387,7 @@ pub(crate) struct Inner { time_to_live: Option, time_to_idle: Option, valid_after: AtomicU64, + weighter: Option>, invalidator_enabled: bool, invalidator: RwLock>>, has_expiration_clock: AtomicBool, @@ -374,7 +404,7 @@ where // https://rust-lang.github.io/rust-clippy/master/index.html#too_many_arguments #[allow(clippy::too_many_arguments)] fn new( - max_capacity: usize, + max_entries: Option, initial_capacity: Option, build_hasher: S, read_op_ch: Receiver>, @@ -392,11 +422,13 @@ where initial_capacity, build_hasher.clone(), ); - let skt_capacity = usize::max(max_capacity * 32, 100); + let skt_capacity = max_entries.map(|n| n * 32).unwrap_or_default().max(100); let frequency_sketch = FrequencySketch::with_capacity(skt_capacity); Self { - max_capacity, + max_entries, + max_weight: None, + total_weight: AtomicU64::default(), cache, build_hasher, deques: Mutex::new(Deques::default()), @@ -406,6 +438,7 @@ where time_to_live, time_to_idle, valid_after: AtomicU64::new(0), + weighter: None, invalidator_enabled, // When enabled, this field will be set later via the set_invalidator method. invalidator: RwLock::new(None), @@ -439,16 +472,22 @@ where } #[inline] - fn remove(&self, key: &Q) -> Option>> + fn remove_entry(&self, key: &Q) -> Option> where Arc: Borrow, Q: Hash + Eq + ?Sized, { - self.cache.remove(key) + self.cache + .remove_entry(key) + .map(|(key, entry)| KeyValueEntry { key, entry }) } - fn max_capacity(&self) -> usize { - self.max_capacity + fn max_entries(&self) -> Option { + self.max_entries + } + + fn max_weight(&self) -> Option { + self.max_weight } #[inline] @@ -556,6 +595,12 @@ where let mut calls = 0; let mut should_sync = true; + let current_total_weight = self.total_weight.load(Ordering::Acquire); + let mut total_weight = TotalWeight { + total: current_total_weight, + weighter: self.weighter.as_ref(), + }; + while should_sync && calls <= max_repeats { let r_len = self.read_op_ch.len(); if r_len > 0 { @@ -564,7 +609,7 @@ where let w_len = self.write_op_ch.len(); if w_len > 0 { - self.apply_writes(&mut deqs, w_len); + self.apply_writes(&mut deqs, w_len, &mut total_weight); } calls += 1; should_sync = self.read_op_ch.len() >= READ_LOG_FLUSH_POINT @@ -572,17 +617,29 @@ where } if self.has_expiry() || self.has_valid_after() { - self.evict(&mut deqs, EVICTION_BATCH_SIZE); + self.evict(&mut deqs, EVICTION_BATCH_SIZE, &mut total_weight); } if self.invalidator_enabled { if let Some(invalidator) = &*self.invalidator.read() { if !invalidator.is_empty() && !invalidator.is_task_running() { - self.invalidate_entries(invalidator, &mut deqs, INVALIDATION_BATCH_SIZE); + self.invalidate_entries( + invalidator, + &mut deqs, + INVALIDATION_BATCH_SIZE, + &mut total_weight, + ); } } } + debug_assert_eq!( + self.total_weight.load(Ordering::Acquire), + current_total_weight + ); + self.total_weight + .store(total_weight.total, Ordering::Release); + if should_sync { Some(SyncPace::Fast) } else if self.write_op_ch.len() <= WRITE_LOG_LOW_WATER_MARK { @@ -603,6 +660,18 @@ where V: Send + Sync + 'static, S: BuildHasher + Clone + Send + Sync + 'static, { + fn has_enough_capacity(&self, total_weight: &TotalWeight<'_, K, V>) -> bool { + let entries = self + .max_entries + .map(|limit| self.cache.len() <= limit) + .unwrap_or(true); + let weight = self + .max_weight + .map(|limit| total_weight.total <= limit) + .unwrap_or(true); + entries && weight + } + fn apply_reads(&self, deqs: &mut Deques, count: usize) { use ReadOp::*; let mut freq = self.frequency_sketch.write(); @@ -620,7 +689,12 @@ where } } - fn apply_writes(&self, deqs: &mut Deques, count: usize) { + fn apply_writes( + &self, + deqs: &mut Deques, + count: usize, + total_weight: &mut TotalWeight<'_, K, V>, + ) { use WriteOp::*; let freq = self.frequency_sketch.read(); let ch = &self.write_op_ch; @@ -628,8 +702,10 @@ where for _ in 0..count { match ch.try_recv() { - Ok(Upsert(kh, entry)) => self.handle_upsert(kh, entry, ts, deqs, &freq), - Ok(Remove(entry)) => Self::handle_remove(deqs, entry), + Ok(Upsert(kh, entry)) => { + self.handle_upsert(kh, entry, ts, deqs, &freq, total_weight) + } + Ok(Remove(key, entry)) => Self::handle_remove(deqs, &key, entry, total_weight), Err(_) => break, }; } @@ -642,6 +718,7 @@ where timestamp: Instant, deqs: &mut Deques, freq: &FrequencySketch, + total_weight: &mut TotalWeight<'_, K, V>, ) { const MAX_RETRY: usize = 5; let mut tries = 0; @@ -659,9 +736,16 @@ where // The entry has been already admitted, so treat this as an update. deqs.move_to_back_ao(&entry); deqs.move_to_back_wo(&entry); - } else if self.cache.len() <= self.max_capacity { + } else if self.has_enough_capacity(total_weight) { // There are some room in the cache. Add the candidate to the deques. - self.handle_admit(kh.clone(), &entry, last_accessed, last_modified, deqs); + self.handle_admit( + kh.clone(), + &entry, + last_accessed, + last_modified, + deqs, + total_weight, + ); } else { let victim = match Self::find_cache_victim(deqs, freq) { // Found a victim. @@ -671,7 +755,14 @@ where // panicking here, admit the candidate as there might be some // room in te cache now. None => { - self.handle_admit(kh.clone(), &entry, last_accessed, last_modified, deqs); + self.handle_admit( + kh.clone(), + &entry, + last_accessed, + last_modified, + deqs, + total_weight, + ); done = true; break; } @@ -680,9 +771,10 @@ where if Self::admit(kh.hash, victim, freq) { // The candidate is admitted. Try to remove the victim from the // cache (hash map). - if let Some(vic_entry) = self.cache.remove(&victim.element.key) { + if let Some((vic_key, vic_entry)) = self.cache.remove_entry(&victim.element.key) + { // And then remove the victim from the deques. - Self::handle_remove(deqs, vic_entry); + Self::handle_remove(deqs, &vic_key, vic_entry, total_weight); } else { // Could not remove the victim from the cache. Skip this // victim node as its ValueEntry might have been @@ -702,6 +794,7 @@ where Arc::clone(&last_accessed), Arc::clone(&last_modified), deqs, + total_weight, ); } else { // The candidate is not admitted. Remove it from the cache (hash map). @@ -746,8 +839,10 @@ where raw_last_accessed: Arc, raw_last_modified: Arc, deqs: &mut Deques, + total_weight: &mut TotalWeight<'_, K, V>, ) { let key = Arc::clone(&kh.key); + total_weight.saturating_add(&key, &entry.value); deqs.push_back_ao( CacheRegion::MainProbation, KeyHashDate::new(kh, raw_last_accessed), @@ -759,9 +854,15 @@ where entry.set_is_admitted(true); } - fn handle_remove(deqs: &mut Deques, entry: Arc>) { + fn handle_remove( + deqs: &mut Deques, + key: &Arc, + entry: Arc>, + total_weight: &mut TotalWeight<'_, K, V>, + ) { if entry.is_admitted() { entry.set_is_admitted(false); + total_weight.saturating_sub(key, &entry.value); deqs.unlink_ao(&entry); Deques::unlink_wo(&mut deqs.write_order, &entry); } @@ -772,21 +873,29 @@ where ao_deq_name: &str, ao_deq: &mut Deque>, wo_deq: &mut Deque>, + key: &Arc, entry: Arc>, + total_weight: &mut TotalWeight<'_, K, V>, ) { if entry.is_admitted() { entry.set_is_admitted(false); + total_weight.saturating_sub(key, &entry.value); Deques::unlink_ao_from_deque(ao_deq_name, ao_deq, &entry); Deques::unlink_wo(wo_deq, &entry); } entry.unset_q_nodes(); } - fn evict(&self, deqs: &mut Deques, batch_size: usize) { + fn evict( + &self, + deqs: &mut Deques, + batch_size: usize, + total_weight: &mut TotalWeight<'_, K, V>, + ) { let now = self.current_time_from_expiration_clock(); if self.is_write_order_queue_enabled() { - self.remove_expired_wo(deqs, batch_size, now); + self.remove_expired_wo(deqs, batch_size, now, total_weight); } if self.time_to_idle.is_some() || self.has_valid_after() { @@ -798,7 +907,7 @@ where ); let mut rm_expired_ao = - |name, deq| self.remove_expired_ao(name, deq, wo, batch_size, now); + |name, deq| self.remove_expired_ao(name, deq, wo, batch_size, now, total_weight); rm_expired_ao("window", window); rm_expired_ao("probation", probation); @@ -814,6 +923,7 @@ where write_order_deq: &mut Deque>, batch_size: usize, now: Instant, + total_weight: &mut TotalWeight<'_, K, V>, ) { let tti = &self.time_to_idle; let va = self.valid_after(); @@ -848,7 +958,14 @@ where .remove_if(key, |_, v| is_expired_entry_ao(tti, va, v, now)); if let Some(entry) = maybe_entry { - Self::handle_remove_with_deques(deq_name, deq, write_order_deq, entry); + Self::handle_remove_with_deques( + deq_name, + deq, + write_order_deq, + key, + entry, + total_weight, + ); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_accessed(); if ts.is_none() { @@ -873,7 +990,13 @@ where } #[inline] - fn remove_expired_wo(&self, deqs: &mut Deques, batch_size: usize, now: Instant) { + fn remove_expired_wo( + &self, + deqs: &mut Deques, + batch_size: usize, + now: Instant, + total_weight: &mut TotalWeight<'_, K, V>, + ) { let ttl = &self.time_to_live; let va = self.valid_after(); for _ in 0..batch_size { @@ -903,7 +1026,7 @@ where .remove_if(key, |_, v| is_expired_entry_wo(ttl, va, v, now)); if let Some(entry) = maybe_entry { - Self::handle_remove(deqs, entry); + Self::handle_remove(deqs, key, entry, total_weight); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_modified(); if ts.is_none() { @@ -931,8 +1054,9 @@ where invalidator: &Invalidator, deqs: &mut Deques, batch_size: usize, + total_weight: &mut TotalWeight<'_, K, V>, ) { - self.process_invalidation_result(invalidator, deqs); + self.process_invalidation_result(invalidator, deqs, total_weight); self.submit_invalidation_task(invalidator, &mut deqs.write_order, batch_size); } @@ -940,14 +1064,15 @@ where &self, invalidator: &Invalidator, deqs: &mut Deques, + total_weight: &mut TotalWeight<'_, K, V>, ) { if let Some(InvalidationResult { invalidated, is_done, }) = invalidator.task_result() { - for entry in invalidated { - Self::handle_remove(deqs, entry); + for KeyValueEntry { key, entry } in invalidated { + Self::handle_remove(deqs, &key, entry, total_weight); } if is_done { deqs.write_order.reset_cursor(); diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 18082202..784a1563 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -39,7 +39,7 @@ use std::{ /// ``` /// pub struct CacheBuilder { - max_capacity: usize, + max_entries: Option, initial_capacity: Option, num_segments: Option, time_to_live: Option, @@ -54,10 +54,10 @@ where V: Clone + Send + Sync + 'static, { /// Construct a new `CacheBuilder` that will be used to build a `Cache` or - /// `SegmentedCache` holding up to `max_capacity` entries. - pub fn new(max_capacity: usize) -> Self { + /// `SegmentedCache` holding up to `max_entries`. + pub fn new(max_entries: usize) -> Self { Self { - max_capacity, + max_entries: Some(max_entries), initial_capacity: None, num_segments: None, time_to_live: None, @@ -76,7 +76,7 @@ where assert!(num_segments > 1); CacheBuilder { - max_capacity: self.max_capacity, + max_entries: self.max_entries, initial_capacity: self.initial_capacity, num_segments: Some(num_segments), time_to_live: self.time_to_live, @@ -93,7 +93,7 @@ where pub fn build(self) -> Cache { let build_hasher = RandomState::default(); Cache::with_everything( - self.max_capacity, + self.max_entries, self.initial_capacity, build_hasher, self.time_to_live, @@ -111,7 +111,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { Cache::with_everything( - self.max_capacity, + self.max_entries, self.initial_capacity, hasher, self.time_to_live, @@ -133,7 +133,7 @@ where pub fn build(self) -> SegmentedCache { let build_hasher = RandomState::default(); SegmentedCache::with_everything( - self.max_capacity, + self.max_entries, self.initial_capacity, self.num_segments.unwrap(), build_hasher, @@ -152,7 +152,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { SegmentedCache::with_everything( - self.max_capacity, + self.max_entries, self.initial_capacity, self.num_segments.unwrap(), hasher, @@ -220,7 +220,7 @@ mod tests { // Cache let cache = CacheBuilder::new(100).build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_entries(), Some(100)); assert_eq!(cache.time_to_live(), None); assert_eq!(cache.time_to_idle(), None); assert_eq!(cache.num_segments(), 1); @@ -233,7 +233,7 @@ mod tests { .time_to_idle(Duration::from_secs(15 * 60)) .build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_entries(), Some(100)); assert_eq!(cache.time_to_live(), Some(Duration::from_secs(45 * 60))); assert_eq!(cache.time_to_idle(), Some(Duration::from_secs(15 * 60))); assert_eq!(cache.num_segments(), 1); @@ -247,7 +247,7 @@ mod tests { // SegmentCache let cache = CacheBuilder::new(100).segments(16).build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_entries(), Some(100)); assert_eq!(cache.time_to_live(), None); assert_eq!(cache.time_to_idle(), None); assert_eq!(cache.num_segments(), 16_usize.next_power_of_two()); @@ -261,7 +261,7 @@ mod tests { .time_to_idle(Duration::from_secs(15 * 60)) .build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_entries(), Some(100)); assert_eq!(cache.time_to_live(), Some(Duration::from_secs(45 * 60))); assert_eq!(cache.time_to_idle(), Some(Duration::from_secs(15 * 60))); assert_eq!(cache.num_segments(), 16_usize.next_power_of_two()); diff --git a/src/sync/cache.rs b/src/sync/cache.rs index 7f1a9546..68783f44 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -2,7 +2,7 @@ use super::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, value_initializer::ValueInitializer, - ConcurrentCacheExt, PredicateId, WriteOp, + ConcurrentCacheExt, KeyValueEntry, PredicateId, WriteOp, }; use crate::{sync::value_initializer::InitResult, PredicateError}; @@ -187,15 +187,15 @@ where K: Hash + Eq + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - /// Constructs a new `Cache` that will store up to the `max_capacity` entries. + /// Constructs a new `Cache` that will store up to the `max_entries`. /// /// To adjust various configuration knobs such as `initial_capacity` or /// `time_to_live`, use the [`CacheBuilder`][builder-struct]. /// /// [builder-struct]: ./struct.CacheBuilder.html - pub fn new(max_capacity: usize) -> Self { + pub fn new(max_entries: usize) -> Self { let build_hasher = RandomState::default(); - Self::with_everything(max_capacity, None, build_hasher, None, None, false) + Self::with_everything(Some(max_entries), None, build_hasher, None, None, false) } } @@ -206,7 +206,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { pub(crate) fn with_everything( - max_capacity: usize, + max_entries: Option, initial_capacity: Option, build_hasher: S, time_to_live: Option, @@ -215,7 +215,7 @@ where ) -> Self { Self { base: BaseCache::new( - max_capacity, + max_entries, initial_capacity, build_hasher.clone(), time_to_live, @@ -358,8 +358,8 @@ where Arc: Borrow, Q: Hash + Eq + ?Sized, { - if let Some(entry) = self.base.remove(key) { - let op = WriteOp::Remove(entry); + if let Some(KeyValueEntry { key, entry }) = self.base.remove_entry(key) { + let op = WriteOp::Remove(key, entry); let hk = self.base.housekeeper.as_ref(); Self::schedule_write_op(&self.base.write_op_ch, op, hk).expect("Failed to remove"); } @@ -421,9 +421,14 @@ where self.base.invalidate_entries_if(predicate) } - /// Returns the `max_capacity` of this cache. - pub fn max_capacity(&self) -> usize { - self.base.max_capacity() + /// Returns the `max_entries` of this cache. + pub fn max_entries(&self) -> Option { + self.base.max_entries() + } + + /// Returns the `max_weight` of this cache. + pub fn max_weight(&self) -> Option { + self.base.max_weight() } /// Returns the `time_to_live` of this cache. diff --git a/src/sync/invalidator.rs b/src/sync/invalidator.rs index 8f3985fe..cc66bb3c 100644 --- a/src/sync/invalidator.rs +++ b/src/sync/invalidator.rs @@ -9,7 +9,7 @@ use crate::{ PredicateError, }; -use super::{base_cache::Inner, PredicateId, PredicateIdStr, ValueEntry}; +use super::{base_cache::Inner, KeyValueEntry, PredicateId, PredicateIdStr, ValueEntry}; use parking_lot::{Mutex, RwLock}; use quanta::Instant; @@ -59,12 +59,12 @@ impl KeyDateLite { } pub(crate) struct InvalidationResult { - pub(crate) invalidated: Vec>>, + pub(crate) invalidated: Vec>, pub(crate) is_done: bool, } impl InvalidationResult { - fn new(invalidated: Vec>>, is_done: bool) -> Self { + fn new(invalidated: Vec>, is_done: bool) -> Self { Self { invalidated, is_done, @@ -399,7 +399,10 @@ where let ts = candidate.timestamp; if Self::apply(&predicates, cache, key, ts) { if let Some(entry) = Self::invalidate(cache, key, ts) { - invalidated.push(entry) + invalidated.push(KeyValueEntry { + key: Arc::clone(key), + entry, + }) } } newest_timestamp = Some(ts); @@ -447,7 +450,7 @@ where } struct ScanResult { - invalidated: Vec>>, + invalidated: Vec>, is_truncated: bool, newest_timestamp: Option, } diff --git a/src/sync/segment.rs b/src/sync/segment.rs index fbf4d67b..8e1b8692 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -58,7 +58,7 @@ where V: Clone + Send + Sync + 'static, { /// Constructs a new `SegmentedCache` that has multiple internal - /// segments and will store up to the `max_capacity` entries. + /// segments and will store up to the `max_entries`. /// /// To adjust various configuration knobs such as `initial_capacity` or /// `time_to_live`, use the [`CacheBuilder`][builder-struct]. @@ -68,10 +68,10 @@ where /// # Panics /// /// Panics if `num_segments` is 0. - pub fn new(max_capacity: usize, num_segments: usize) -> Self { + pub fn new(max_entries: usize, num_segments: usize) -> Self { let build_hasher = RandomState::default(); Self::with_everything( - max_capacity, + Some(max_entries), None, num_segments, build_hasher, @@ -92,7 +92,7 @@ where /// /// Panics if `num_segments` is 0. pub(crate) fn with_everything( - max_capacity: usize, + max_entries: Option, initial_capacity: Option, num_segments: usize, build_hasher: S, @@ -102,7 +102,7 @@ where ) -> Self { Self { inner: Arc::new(Inner::new( - max_capacity, + max_entries, initial_capacity, num_segments, build_hasher, @@ -241,9 +241,9 @@ where Ok(()) } - /// Returns the `max_capacity` of this cache. - pub fn max_capacity(&self) -> usize { - self.inner.desired_capacity + /// Returns the `max_entries` of this cache. + pub fn max_entries(&self) -> Option { + self.inner.desired_max_entries } /// Returns the `time_to_live` of this cache. @@ -343,7 +343,8 @@ impl MockExpirationClock { } struct Inner { - desired_capacity: usize, + desired_max_entries: Option, + // desired_max_weight: Option, segments: Box<[Cache]>, build_hasher: S, segment_shift: u32, @@ -359,7 +360,7 @@ where /// /// Panics if `num_segments` is 0. fn new( - max_capacity: usize, + max_entries: Option, initial_capacity: Option, num_segments: usize, build_hasher: S, @@ -372,14 +373,14 @@ where let actual_num_segments = num_segments.next_power_of_two(); let segment_shift = 64 - actual_num_segments.trailing_zeros(); // TODO: Round up. - let seg_capacity = max_capacity / actual_num_segments; + let seg_max_entries = max_entries.map(|n| n / actual_num_segments); let seg_init_capacity = initial_capacity.map(|cap| cap / actual_num_segments); // NOTE: We cannot initialize the segments as `vec![cache; actual_num_segments]` // because Cache::clone() does not clone its inner but shares the same inner. let segments = (0..num_segments) .map(|_| { Cache::with_everything( - seg_capacity, + seg_max_entries, seg_init_capacity, build_hasher.clone(), time_to_live, @@ -390,7 +391,8 @@ where .collect::>(); Self { - desired_capacity: max_capacity, + desired_max_entries: max_entries, + // desired_max_weight: None, segments: segments.into_boxed_slice(), build_hasher, segment_shift, From 35a789e628bc1cb46004066ceeb46ff766f4b8c6 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Mon, 9 Aug 2021 23:48:32 +0800 Subject: [PATCH 03/42] Fix the CI on MSRV 1.45.2 --- .github/workflows/CI.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 82c27af1..3002def3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -52,7 +52,6 @@ jobs: uses: actions-rs/cargo@v1 with: command: test - args: --features future - name: Run tests (future) uses: actions-rs/cargo@v1 From aa583ad638ff85b922de781973d484ba818532de Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 10:52:32 +0800 Subject: [PATCH 04/42] Temporary disable the CI for MSRV 1.45.2 It is currently failing because bitflags v1.3.1 is incompatible with the MSRV. This issue will be addressed by another pull request. --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 3002def3..406b438b 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -27,7 +27,7 @@ jobs: - stable - beta - 1.46.0 # MSRV (future) - - 1.45.2 # MSRV (no features) + # - 1.45.2 # MSRV (no features) steps: - name: Checkout Moka From 20823381d6eadd679c96e7d100605af01b8edf04 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 11:03:36 +0800 Subject: [PATCH 05/42] Remove unnecessary hash calculation from an internal method: future::Cache::get_or_try_insert_with_hash_and_fun --- src/future/cache.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/future/cache.rs b/src/future/cache.rs index 956500a8..4b914625 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -510,7 +510,6 @@ where .await { InitResult::Initialized(v) => { - let hash = self.base.hash(&key); self.insert_with_hash(Arc::clone(&key), hash, v.clone()) .await; self.value_initializer From 6a5444e95eff2ac587a89e961ef0aaddb9bffc4d Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 11:03:51 +0800 Subject: [PATCH 06/42] Add some source code comments --- src/sync.rs | 2 +- src/sync/base_cache.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sync.rs b/src/sync.rs index d63e1422..f5f43f8e 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -143,7 +143,7 @@ impl ValueEntry { }; let last_accessed = Arc::clone(&other.last_accessed); let last_modified = Arc::clone(&other.last_modified); - // To prevent this updated ValueEntry from being evicted by a expiration policy, + // To prevent this updated ValueEntry from being evicted by an expiration policy, // set the max value to the timestamps. They will be replaced with the real // timestamps when applying writes. last_accessed.store(std::u64::MAX, Ordering::Release); diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 68bfc5c2..b4574bda 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -262,6 +262,9 @@ where }, // on_modify |_k, old_entry| { + // NOTE: `new_with` sets the max value to the last_accessed and last_modified + // to prevent this updated ValueEntry from being evicted by an expiration policy. + // See the comments in `new_with` for more details. let entry = Arc::new(ValueEntry::new_with(value.clone(), old_entry)); let cnt = op_cnt2.fetch_add(1, Ordering::Relaxed); op2 = Some(( From 2835b9451c057dc0adfabaeaebc7d21d0db37fd9 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 17:42:11 +0800 Subject: [PATCH 07/42] Support weight-based (cost-based) eviction and unbound cache - Implement aggregated victims strategy for cost-based eviction. - Rename max_entry back to max_capacity, and remove an optional max_weight field from InnerCache. --- src/common/deque.rs | 4 + src/future/builder.rs | 16 +-- src/future/cache.rs | 42 +++--- src/sync/base_cache.rs | 289 +++++++++++++++++++++++------------------ src/sync/builder.rs | 26 ++-- src/sync/cache.rs | 31 ++--- src/sync/segment.rs | 38 +++--- src/unsync/cache.rs | 11 +- 8 files changed, 258 insertions(+), 199 deletions(-) diff --git a/src/common/deque.rs b/src/common/deque.rs index 0bdb01c5..281224c1 100644 --- a/src/common/deque.rs +++ b/src/common/deque.rs @@ -49,6 +49,10 @@ impl DeqNode { element, } } + + pub(crate) fn next_node(&self) -> Option<&DeqNode> { + self.next.as_ref().map(|node| unsafe { node.as_ref() }) + } } /// Cursor is used to remember the current iterating position. diff --git a/src/future/builder.rs b/src/future/builder.rs index 3070bb0f..20653175 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -37,8 +37,7 @@ use std::{ /// ``` /// pub struct CacheBuilder { - max_entries: Option, - // max_weight: Option, + max_capacity: Option, initial_capacity: Option, time_to_live: Option, time_to_idle: Option, @@ -53,8 +52,7 @@ where { pub(crate) fn unbound() -> Self { Self { - max_entries: None, - // max_weight: None, + max_capacity: None, initial_capacity: None, time_to_live: None, time_to_idle: None, @@ -67,7 +65,7 @@ where /// up to `max_capacity` entries. pub fn new(max_capacity: usize) -> Self { Self { - max_entries: Some(max_capacity), + max_capacity: Some(max_capacity), ..Self::unbound() } } @@ -76,7 +74,7 @@ where pub fn build(self) -> Cache { let build_hasher = RandomState::default(); Cache::with_everything( - self.max_entries, + self.max_capacity, self.initial_capacity, build_hasher, self.time_to_live, @@ -91,7 +89,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { Cache::with_everything( - self.max_entries, + self.max_capacity, self.initial_capacity, hasher, self.time_to_live, @@ -158,7 +156,7 @@ mod tests { // Cache let cache = CacheBuilder::new(100).build(); - assert_eq!(cache.max_entries(), Some(100)); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), None); assert_eq!(cache.time_to_idle(), None); assert_eq!(cache.num_segments(), 1); @@ -171,7 +169,7 @@ mod tests { .time_to_idle(Duration::from_secs(15 * 60)) .build(); - assert_eq!(cache.max_entries(), Some(100)); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), Some(Duration::from_secs(45 * 60))); assert_eq!(cache.time_to_idle(), Some(Duration::from_secs(15 * 60))); assert_eq!(cache.num_segments(), 1); diff --git a/src/future/cache.rs b/src/future/cache.rs index 4b914625..a9bfe072 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -215,15 +215,15 @@ where K: Hash + Eq + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - /// Constructs a new `Cache` that will store up to the `max_entries`. + /// Constructs a new `Cache` that will store up to the `max_capacity`. /// /// To adjust various configuration knobs such as `initial_capacity` or /// `time_to_live`, use the [`CacheBuilder`][builder-struct]. /// /// [builder-struct]: ./struct.CacheBuilder.html - pub fn new(max_entries: usize) -> Self { + pub fn new(max_capacity: usize) -> Self { let build_hasher = RandomState::default(); - Self::with_everything(Some(max_entries), None, build_hasher, None, None, false) + Self::with_everything(Some(max_capacity), None, build_hasher, None, None, false) } pub fn builder() -> CacheBuilder> { @@ -238,7 +238,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { pub(crate) fn with_everything( - max_entries: Option, + max_capacity: Option, initial_capacity: Option, build_hasher: S, time_to_live: Option, @@ -247,7 +247,7 @@ where ) -> Self { Self { base: BaseCache::new( - max_entries, + max_capacity, initial_capacity, build_hasher.clone(), time_to_live, @@ -417,14 +417,9 @@ where self.base.invalidate_entries_if(Arc::new(predicate)) } - /// Returns the `max_entries` of this cache. - pub fn max_entries(&self) -> Option { - self.base.max_entries() - } - - /// Returns the `max_weight` of this cache. - pub fn max_weight(&self) -> Option { - self.base.max_weight() + /// Returns the `max_capacity` of this cache. + pub fn max_capacity(&self) -> Option { + self.base.max_capacity() } /// Returns the `time_to_live` of this cache. @@ -510,6 +505,7 @@ where .await { InitResult::Initialized(v) => { + let hash = self.base.hash(&key); self.insert_with_hash(Arc::clone(&key), hash, v.clone()) .await; self.value_initializer @@ -639,10 +635,16 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); + assert_eq!(cache.get(&"a"), Some("alice")); + assert_eq!(cache.get(&"b"), Some("bob")); + assert_eq!(cache.get(&"c"), Some("cindy")); + cache.sync(); + // counts: a -> 2, b -> 2, c -> 2 + assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 2, b -> 2, c -> 1 + // counts: a -> 3, b -> 3, c -> 2 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david").await; // count: d -> 0 @@ -654,7 +656,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher then c's. + // because d's frequency equals to c's. cache.insert("d", "dennis").await; cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); @@ -686,10 +688,16 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); + assert_eq!(cache.get(&"a"), Some("alice")); + assert_eq!(cache.get(&"b"), Some("bob")); + assert_eq!(cache.get(&"c"), Some("cindy")); + cache.sync(); + // counts: a -> 2, b -> 2, c -> 2 + assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 2, b -> 2, c -> 1 + // counts: a -> 3, b -> 3, c -> 2 // "d" should not be admitted because its frequency is too low. cache.blocking_insert("d", "david"); // count: d -> 0 @@ -701,7 +709,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher then c's. + // because d's frequency equals to c's. cache.blocking_insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index b4574bda..2f518f3a 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -83,7 +83,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { pub(crate) fn new( - max_entries: Option, + max_capacity: Option, initial_capacity: Option, build_hasher: S, time_to_live: Option, @@ -93,7 +93,7 @@ where let (r_snd, r_rcv) = crossbeam_channel::bounded(READ_LOG_SIZE); let (w_snd, w_rcv) = crossbeam_channel::bounded(WRITE_LOG_SIZE); let inner = Arc::new(Inner::new( - max_entries, + max_capacity, initial_capacity, build_hasher, r_rcv, @@ -196,12 +196,8 @@ where self.inner.register_invalidation_predicate(predicate, now) } - pub(crate) fn max_entries(&self) -> Option { - self.inner.max_entries() - } - - pub(crate) fn max_weight(&self) -> Option { - self.inner.max_weight() + pub(crate) fn max_capacity(&self) -> Option { + self.inner.max_capacity() } pub(crate) fn time_to_live(&self) -> Option { @@ -356,30 +352,42 @@ struct TotalWeight<'a, K, V> { } impl<'a, K, V> TotalWeight<'a, K, V> { + #[inline] + fn weight(&self, key: &K, value: &V) -> u64 { + self.weighter.map(|w| w(key, value)).unwrap_or(1) + } + + #[inline] fn saturating_add(&mut self, key: &K, value: &V) { - if let Some(weighter) = &self.weighter { - let total = &mut self.total; - let weight = weighter(key, value); - *total = total.saturating_add(weight); - } + let weight = self.weight(key, value); + let total = &mut self.total; + *total = total.saturating_add(weight); } + #[inline] fn saturating_sub(&mut self, key: &K, value: &V) { - if let Some(weighter) = &self.weighter { - let total = &mut self.total; - let weight = weighter(key, value); - *total = total.saturating_sub(weight); - } + let weight = self.weight(key, value); + let total = &mut self.total; + *total = total.saturating_sub(weight); } } +enum AdmissionResult { + Admitted { + victims: Vec>>>, + skipped_victims: Vec>>>, + }, + Rejected { + skipped_victims: Vec>>>, + }, +} + type CacheStore = moka_cht::SegmentedHashMap, Arc>, S>; type CacheEntry = (Arc, Arc>); pub(crate) struct Inner { - max_entries: Option, - max_weight: Option, + max_capacity: Option, total_weight: AtomicU64, cache: CacheStore, build_hasher: S, @@ -407,7 +415,7 @@ where // https://rust-lang.github.io/rust-clippy/master/index.html#too_many_arguments #[allow(clippy::too_many_arguments)] fn new( - max_entries: Option, + max_capacity: Option, initial_capacity: Option, build_hasher: S, read_op_ch: Receiver>, @@ -425,12 +433,11 @@ where initial_capacity, build_hasher.clone(), ); - let skt_capacity = max_entries.map(|n| n * 32).unwrap_or_default().max(100); + let skt_capacity = max_capacity.map(|n| n * 32).unwrap_or_default().max(100); let frequency_sketch = FrequencySketch::with_capacity(skt_capacity); Self { - max_entries, - max_weight: None, + max_capacity: max_capacity.map(|n| n as u64), total_weight: AtomicU64::default(), cache, build_hasher, @@ -485,12 +492,8 @@ where .map(|(key, entry)| KeyValueEntry { key, entry }) } - fn max_entries(&self) -> Option { - self.max_entries - } - - fn max_weight(&self) -> Option { - self.max_weight + fn max_capacity(&self) -> Option { + self.max_capacity.map(|n| n as usize) } #[inline] @@ -663,16 +666,10 @@ where V: Send + Sync + 'static, S: BuildHasher + Clone + Send + Sync + 'static, { - fn has_enough_capacity(&self, total_weight: &TotalWeight<'_, K, V>) -> bool { - let entries = self - .max_entries - .map(|limit| self.cache.len() <= limit) - .unwrap_or(true); - let weight = self - .max_weight - .map(|limit| total_weight.total <= limit) - .unwrap_or(true); - entries && weight + fn has_enough_capacity(&self, total_weight: &TotalWeight<'_, K, V>, weight: u64) -> bool { + self.max_capacity + .map(|limit| total_weight.total + weight <= limit) + .unwrap_or(true) } fn apply_reads(&self, deqs: &mut Deques, count: usize) { @@ -723,116 +720,158 @@ where freq: &FrequencySketch, total_weight: &mut TotalWeight<'_, K, V>, ) { - const MAX_RETRY: usize = 5; - let mut tries = 0; - let mut done = false; - entry.set_last_accessed(timestamp); entry.set_last_modified(timestamp); let last_accessed = entry.raw_last_accessed(); let last_modified = entry.raw_last_modified(); - while tries < MAX_RETRY { - tries += 1; + if entry.is_admitted() { + // The entry has been already admitted, so treat this as an update. + deqs.move_to_back_ao(&entry); + deqs.move_to_back_wo(&entry); + return; + } + + let space_needed = total_weight.weight(&kh.key, &entry.value); - if entry.is_admitted() { - // The entry has been already admitted, so treat this as an update. - deqs.move_to_back_ao(&entry); - deqs.move_to_back_wo(&entry); - } else if self.has_enough_capacity(total_weight) { - // There are some room in the cache. Add the candidate to the deques. - self.handle_admit( - kh.clone(), - &entry, - last_accessed, - last_modified, - deqs, - total_weight, - ); - } else { - let victim = match Self::find_cache_victim(deqs, freq) { - // Found a victim. - Some(node) => node, - // Not found a victim. This condition should be unreachable - // because there was no room in the cache. But rather than - // panicking here, admit the candidate as there might be some - // room in te cache now. - None => { - self.handle_admit( - kh.clone(), - &entry, - last_accessed, - last_modified, - deqs, - total_weight, - ); - done = true; - break; - } - }; + if self.has_enough_capacity(total_weight, space_needed) { + // There are enough room in the cache (or the cache is unbounded). + // Add the candidate to the deques. + self.handle_admit(kh, &entry, last_accessed, last_modified, deqs, total_weight); + return; + } + + if let Some(max) = self.max_capacity { + if space_needed > max { + // The candidate is too big to fit in the cache. Reject it. + self.cache.remove(&Arc::clone(&kh.key)); + return; + } + } - if Self::admit(kh.hash, victim, freq) { - // The candidate is admitted. Try to remove the victim from the - // cache (hash map). - if let Some((vic_key, vic_entry)) = self.cache.remove_entry(&victim.element.key) + let skipped; + + // Try to admit the candidate. + match Self::admit(kh.hash, space_needed, &self.cache, deqs, freq, total_weight) { + AdmissionResult::Admitted { + victims, + mut skipped_victims, + } => { + // Try to remove the victims from the cache (hash map). + for victim in victims { + if let Some((vic_key, vic_entry)) = self + .cache + .remove_entry(unsafe { &victim.as_ref().element.key }) { // And then remove the victim from the deques. Self::handle_remove(deqs, &vic_key, vic_entry, total_weight); } else { // Could not remove the victim from the cache. Skip this // victim node as its ValueEntry might have been - // invalidated. Since the invalidated ValueEntry (which - // should be still in the write op queue) has a pointer to - // this node, we move the node to the back of the deque - // instead of unlinking (dropping) it. - let victim = NonNull::from(victim); - unsafe { deqs.probation.move_to_back(victim) }; - - continue; // Retry + // invalidated. Add it to the skipped_victim. + skipped_victims.push(victim); } - // Add the candidate to the deques. - self.handle_admit( - kh.clone(), - &entry, - Arc::clone(&last_accessed), - Arc::clone(&last_modified), - deqs, - total_weight, - ); - } else { - // The candidate is not admitted. Remove it from the cache (hash map). - self.cache.remove(&Arc::clone(&kh.key)); } + skipped = skipped_victims; + + // Add the candidate to the deques. + self.handle_admit( + kh, + &entry, + Arc::clone(&last_accessed), + Arc::clone(&last_modified), + deqs, + total_weight, + ); } - done = true; - break; - } + AdmissionResult::Rejected { skipped_victims } => { + skipped = skipped_victims; + // Remove the candidate from the cache (hash map). + self.cache.remove(&Arc::clone(&kh.key)); + } + }; - if !done { - // Too mary retries. Remove the candidate from the cache. - self.cache.remove(&Arc::clone(&kh.key)); + // Move the skipped victim nodes to the back of the deque. We do not unlink + // (drop) these nodes because ValueEntries in the write op queue should be + // pointing them. + for node in skipped { + unsafe { deqs.probation.move_to_back(node) }; } } - #[inline] - fn find_cache_victim<'a>( - deqs: &'a Deques, - _freq: &FrequencySketch, - ) -> Option<&'a DeqNode>> { - // TODO: Check its frequency. If it is not very low, maybe we should - // check frequencies of next few others and pick from them. - deqs.probation.peek_front() - } - #[inline] fn admit( candidate_hash: u64, - victim: &DeqNode>, + space_needed: u64, + cache: &CacheStore, + deqs: &Deques, freq: &FrequencySketch, - ) -> bool { - // TODO: Implement some randomness to mitigate hash DoS attack. + total_weight: &TotalWeight<'_, K, V>, + ) -> AdmissionResult { + let candidate_freq = freq.frequency(candidate_hash) as u32; + let mut victims_freq = 0u32; + let mut victims_size = 0u64; + let mut victims = Vec::default(); + let mut skipped_victims = Vec::default(); + let mut current_victim; + + // Find first victim. + loop { + if let Some(victim) = deqs.probation.peek_front() { + if let Some(vic_entry) = cache.get(&victim.element.key) { + victims_freq += freq.frequency(victim.element.hash) as u32; + victims_size += total_weight.weight(&victim.element.key, &vic_entry.value); + current_victim = victim.next_node(); + victims.push(NonNull::from(victim)); + break; + } else { + // Could not get the victim from the cache. Skip this node as its + // ValueEntry might have been invalidated. + skipped_victims.push(NonNull::from(victim)); + } + } else { + // No more victims. Reject the candidate. + return AdmissionResult::Rejected { skipped_victims }; + } + } + + // Aggregate victims. + while victims_size < space_needed { + if candidate_freq < victims_freq { + break; + } + if let Some(victim) = current_victim.take() { + if let Some(vic_entry) = cache.get(&victim.element.key) { + victims_freq += freq.frequency(victim.element.hash) as u32; + victims_size += total_weight.weight(&victim.element.key, &vic_entry.value); + current_victim = victim.next_node(); + victims.push(NonNull::from(victim)); + } else { + // Could not get the victim from the cache. Skip this node as its + // ValueEntry might have been invalidated. + skipped_victims.push(NonNull::from(victim)); + } + } else { + // No more victims. + break; + } + } + + // Admit or reject the candidate. + + // TODO: Implement some randomness to mitigate hash DoS attack? // See Caffeine's implementation. - freq.frequency(candidate_hash) > freq.frequency(victim.element.hash) + + if victims_size >= space_needed && candidate_freq >= victims_freq { + dbg!("admitted"); + AdmissionResult::Admitted { + victims, + skipped_victims, + } + } else { + dbg!("rejected"); + AdmissionResult::Rejected { skipped_victims } + } } fn handle_admit( diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 784a1563..79e709c9 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -39,7 +39,7 @@ use std::{ /// ``` /// pub struct CacheBuilder { - max_entries: Option, + max_capacity: Option, initial_capacity: Option, num_segments: Option, time_to_live: Option, @@ -54,10 +54,10 @@ where V: Clone + Send + Sync + 'static, { /// Construct a new `CacheBuilder` that will be used to build a `Cache` or - /// `SegmentedCache` holding up to `max_entries`. - pub fn new(max_entries: usize) -> Self { + /// `SegmentedCache` holding up to `max_capacity` entries. + pub fn new(max_capacity: usize) -> Self { Self { - max_entries: Some(max_entries), + max_capacity: Some(max_capacity), initial_capacity: None, num_segments: None, time_to_live: None, @@ -76,7 +76,7 @@ where assert!(num_segments > 1); CacheBuilder { - max_entries: self.max_entries, + max_capacity: self.max_capacity, initial_capacity: self.initial_capacity, num_segments: Some(num_segments), time_to_live: self.time_to_live, @@ -93,7 +93,7 @@ where pub fn build(self) -> Cache { let build_hasher = RandomState::default(); Cache::with_everything( - self.max_entries, + self.max_capacity, self.initial_capacity, build_hasher, self.time_to_live, @@ -111,7 +111,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { Cache::with_everything( - self.max_entries, + self.max_capacity, self.initial_capacity, hasher, self.time_to_live, @@ -133,7 +133,7 @@ where pub fn build(self) -> SegmentedCache { let build_hasher = RandomState::default(); SegmentedCache::with_everything( - self.max_entries, + self.max_capacity, self.initial_capacity, self.num_segments.unwrap(), build_hasher, @@ -152,7 +152,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { SegmentedCache::with_everything( - self.max_entries, + self.max_capacity, self.initial_capacity, self.num_segments.unwrap(), hasher, @@ -220,7 +220,7 @@ mod tests { // Cache let cache = CacheBuilder::new(100).build(); - assert_eq!(cache.max_entries(), Some(100)); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), None); assert_eq!(cache.time_to_idle(), None); assert_eq!(cache.num_segments(), 1); @@ -233,7 +233,7 @@ mod tests { .time_to_idle(Duration::from_secs(15 * 60)) .build(); - assert_eq!(cache.max_entries(), Some(100)); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), Some(Duration::from_secs(45 * 60))); assert_eq!(cache.time_to_idle(), Some(Duration::from_secs(15 * 60))); assert_eq!(cache.num_segments(), 1); @@ -247,7 +247,7 @@ mod tests { // SegmentCache let cache = CacheBuilder::new(100).segments(16).build(); - assert_eq!(cache.max_entries(), Some(100)); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), None); assert_eq!(cache.time_to_idle(), None); assert_eq!(cache.num_segments(), 16_usize.next_power_of_two()); @@ -261,7 +261,7 @@ mod tests { .time_to_idle(Duration::from_secs(15 * 60)) .build(); - assert_eq!(cache.max_entries(), Some(100)); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), Some(Duration::from_secs(45 * 60))); assert_eq!(cache.time_to_idle(), Some(Duration::from_secs(15 * 60))); assert_eq!(cache.num_segments(), 16_usize.next_power_of_two()); diff --git a/src/sync/cache.rs b/src/sync/cache.rs index 68783f44..d458d82e 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -187,15 +187,15 @@ where K: Hash + Eq + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - /// Constructs a new `Cache` that will store up to the `max_entries`. + /// Constructs a new `Cache` that will store up to the `max_capacity`. /// /// To adjust various configuration knobs such as `initial_capacity` or /// `time_to_live`, use the [`CacheBuilder`][builder-struct]. /// /// [builder-struct]: ./struct.CacheBuilder.html - pub fn new(max_entries: usize) -> Self { + pub fn new(max_capacity: usize) -> Self { let build_hasher = RandomState::default(); - Self::with_everything(Some(max_entries), None, build_hasher, None, None, false) + Self::with_everything(Some(max_capacity), None, build_hasher, None, None, false) } } @@ -206,7 +206,7 @@ where S: BuildHasher + Clone + Send + Sync + 'static, { pub(crate) fn with_everything( - max_entries: Option, + max_capacity: Option, initial_capacity: Option, build_hasher: S, time_to_live: Option, @@ -215,7 +215,7 @@ where ) -> Self { Self { base: BaseCache::new( - max_entries, + max_capacity, initial_capacity, build_hasher.clone(), time_to_live, @@ -421,14 +421,9 @@ where self.base.invalidate_entries_if(predicate) } - /// Returns the `max_entries` of this cache. - pub fn max_entries(&self) -> Option { - self.base.max_entries() - } - - /// Returns the `max_weight` of this cache. - pub fn max_weight(&self) -> Option { - self.base.max_weight() + /// Returns the `max_capacity` of this cache. + pub fn max_capacity(&self) -> Option { + self.base.max_capacity() } /// Returns the `time_to_live` of this cache. @@ -552,10 +547,16 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); + assert_eq!(cache.get(&"a"), Some("alice")); + assert_eq!(cache.get(&"b"), Some("bob")); + assert_eq!(cache.get(&"c"), Some("cindy")); + cache.sync(); + // counts: a -> 2, b -> 2, c -> 2 + assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 2, b -> 2, c -> 1 + // counts: a -> 3, b -> 3, c -> 2 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david"); // count: d -> 0 @@ -567,7 +568,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher then c's. + // because d's frequency equals to c's. cache.insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/sync/segment.rs b/src/sync/segment.rs index 8e1b8692..06a66da4 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -58,7 +58,7 @@ where V: Clone + Send + Sync + 'static, { /// Constructs a new `SegmentedCache` that has multiple internal - /// segments and will store up to the `max_entries`. + /// segments and will store up to the `max_capacity`. /// /// To adjust various configuration knobs such as `initial_capacity` or /// `time_to_live`, use the [`CacheBuilder`][builder-struct]. @@ -68,10 +68,10 @@ where /// # Panics /// /// Panics if `num_segments` is 0. - pub fn new(max_entries: usize, num_segments: usize) -> Self { + pub fn new(max_capacity: usize, num_segments: usize) -> Self { let build_hasher = RandomState::default(); Self::with_everything( - Some(max_entries), + Some(max_capacity), None, num_segments, build_hasher, @@ -92,7 +92,7 @@ where /// /// Panics if `num_segments` is 0. pub(crate) fn with_everything( - max_entries: Option, + max_capacity: Option, initial_capacity: Option, num_segments: usize, build_hasher: S, @@ -102,7 +102,7 @@ where ) -> Self { Self { inner: Arc::new(Inner::new( - max_entries, + max_capacity, initial_capacity, num_segments, build_hasher, @@ -241,9 +241,9 @@ where Ok(()) } - /// Returns the `max_entries` of this cache. - pub fn max_entries(&self) -> Option { - self.inner.desired_max_entries + /// Returns the `max_capacity` of this cache. + pub fn max_capacity(&self) -> Option { + self.inner.desired_capacity } /// Returns the `time_to_live` of this cache. @@ -343,8 +343,7 @@ impl MockExpirationClock { } struct Inner { - desired_max_entries: Option, - // desired_max_weight: Option, + desired_capacity: Option, segments: Box<[Cache]>, build_hasher: S, segment_shift: u32, @@ -360,7 +359,7 @@ where /// /// Panics if `num_segments` is 0. fn new( - max_entries: Option, + max_capacity: Option, initial_capacity: Option, num_segments: usize, build_hasher: S, @@ -373,14 +372,14 @@ where let actual_num_segments = num_segments.next_power_of_two(); let segment_shift = 64 - actual_num_segments.trailing_zeros(); // TODO: Round up. - let seg_max_entries = max_entries.map(|n| n / actual_num_segments); + let seg_max_capacity = max_capacity.map(|n| n / actual_num_segments); let seg_init_capacity = initial_capacity.map(|cap| cap / actual_num_segments); // NOTE: We cannot initialize the segments as `vec![cache; actual_num_segments]` // because Cache::clone() does not clone its inner but shares the same inner. let segments = (0..num_segments) .map(|_| { Cache::with_everything( - seg_max_entries, + seg_max_capacity, seg_init_capacity, build_hasher.clone(), time_to_live, @@ -391,8 +390,7 @@ where .collect::>(); Self { - desired_max_entries: max_entries, - // desired_max_weight: None, + desired_capacity: max_capacity, segments: segments.into_boxed_slice(), build_hasher, segment_shift, @@ -452,10 +450,16 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); + assert_eq!(cache.get(&"a"), Some("alice")); + assert_eq!(cache.get(&"b"), Some("bob")); + assert_eq!(cache.get(&"c"), Some("cindy")); + cache.sync(); + // counts: a -> 2, b -> 2, c -> 2 + assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 2, b -> 2, c -> 1 + // counts: a -> 3, b -> 3, c -> 2 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david"); // count: d -> 0 @@ -467,7 +471,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher then c's. + // because d's frequency equals to c's. cache.insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index ca4128ca..af54aaca 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -444,7 +444,7 @@ where ) -> bool { // TODO: Implement some randomness to mitigate hash DoS attack. // See Caffeine's implementation. - freq.frequency(candidate_hash) > freq.frequency(victim.element.hash) + freq.frequency(candidate_hash) >= freq.frequency(victim.element.hash) } fn handle_update( @@ -605,7 +605,12 @@ mod tests { assert_eq!(cache.get(&"a"), Some(&"alice")); assert_eq!(cache.get(&"b"), Some(&"bob")); - // counts: a -> 2, b -> 2, c -> 1 + assert_eq!(cache.get(&"c"), Some(&"cindy")); + // counts: a -> 2, b -> 2, c -> 2 + + assert_eq!(cache.get(&"a"), Some(&"alice")); + assert_eq!(cache.get(&"b"), Some(&"bob")); + // counts: a -> 3, b -> 3, c -> 2 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david"); // count: d -> 0 @@ -615,7 +620,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher then c's. + // because d's frequency equals to c's. cache.insert("d", "dennis"); assert_eq!(cache.get(&"a"), Some(&"alice")); assert_eq!(cache.get(&"b"), Some(&"bob")); From 427e67f8d488580539d936a8e648c4dea4532b98 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 18:25:24 +0800 Subject: [PATCH 08/42] Support cost-based eviction - Rename an internal structure TotalWeight to TotalCost. - Lightly refactored. --- src/sync/base_cache.rs | 163 ++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 85 deletions(-) diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 2f518f3a..7829817a 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -346,29 +346,42 @@ where } } -struct TotalWeight<'a, K, V> { +struct TotalCost<'a, K, V> { total: u64, weighter: Option<&'a Weighter>, } -impl<'a, K, V> TotalWeight<'a, K, V> { +impl<'a, K, V> TotalCost<'a, K, V> { #[inline] - fn weight(&self, key: &K, value: &V) -> u64 { + fn cost(&self, key: &K, value: &V) -> u64 { self.weighter.map(|w| w(key, value)).unwrap_or(1) } #[inline] - fn saturating_add(&mut self, key: &K, value: &V) { - let weight = self.weight(key, value); + fn saturating_add(&mut self, cost: u64) { let total = &mut self.total; - *total = total.saturating_add(weight); + *total = total.saturating_add(cost); } #[inline] fn saturating_sub(&mut self, key: &K, value: &V) { - let weight = self.weight(key, value); + let cost = self.cost(key, value); let total = &mut self.total; - *total = total.saturating_sub(weight); + *total = total.saturating_sub(cost); + } +} + +struct RawTimestamps { + last_accessed: Arc, + last_modified: Arc, +} + +impl RawTimestamps { + fn new(entry: &Arc>) -> Self { + Self { + last_accessed: entry.raw_last_accessed(), + last_modified: entry.raw_last_modified(), + } } } @@ -388,7 +401,7 @@ type CacheEntry = (Arc, Arc>); pub(crate) struct Inner { max_capacity: Option, - total_weight: AtomicU64, + total_cost: AtomicU64, cache: CacheStore, build_hasher: S, deques: Mutex>, @@ -438,7 +451,7 @@ where Self { max_capacity: max_capacity.map(|n| n as u64), - total_weight: AtomicU64::default(), + total_cost: AtomicU64::default(), cache, build_hasher, deques: Mutex::new(Deques::default()), @@ -601,9 +614,9 @@ where let mut calls = 0; let mut should_sync = true; - let current_total_weight = self.total_weight.load(Ordering::Acquire); - let mut total_weight = TotalWeight { - total: current_total_weight, + let current_total_cost = self.total_cost.load(Ordering::Acquire); + let mut total_cost = TotalCost { + total: current_total_cost, weighter: self.weighter.as_ref(), }; @@ -615,7 +628,7 @@ where let w_len = self.write_op_ch.len(); if w_len > 0 { - self.apply_writes(&mut deqs, w_len, &mut total_weight); + self.apply_writes(&mut deqs, w_len, &mut total_cost); } calls += 1; should_sync = self.read_op_ch.len() >= READ_LOG_FLUSH_POINT @@ -623,7 +636,7 @@ where } if self.has_expiry() || self.has_valid_after() { - self.evict(&mut deqs, EVICTION_BATCH_SIZE, &mut total_weight); + self.evict(&mut deqs, EVICTION_BATCH_SIZE, &mut total_cost); } if self.invalidator_enabled { @@ -633,18 +646,14 @@ where invalidator, &mut deqs, INVALIDATION_BATCH_SIZE, - &mut total_weight, + &mut total_cost, ); } } } - debug_assert_eq!( - self.total_weight.load(Ordering::Acquire), - current_total_weight - ); - self.total_weight - .store(total_weight.total, Ordering::Release); + debug_assert_eq!(self.total_cost.load(Ordering::Acquire), current_total_cost); + self.total_cost.store(total_cost.total, Ordering::Release); if should_sync { Some(SyncPace::Fast) @@ -666,9 +675,9 @@ where V: Send + Sync + 'static, S: BuildHasher + Clone + Send + Sync + 'static, { - fn has_enough_capacity(&self, total_weight: &TotalWeight<'_, K, V>, weight: u64) -> bool { + fn has_enough_capacity(&self, candidate_cost: u64, total_cost: &TotalCost<'_, K, V>) -> bool { self.max_capacity - .map(|limit| total_weight.total + weight <= limit) + .map(|limit| total_cost.total + candidate_cost <= limit) .unwrap_or(true) } @@ -693,7 +702,7 @@ where &self, deqs: &mut Deques, count: usize, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { use WriteOp::*; let freq = self.frequency_sketch.read(); @@ -702,10 +711,8 @@ where for _ in 0..count { match ch.try_recv() { - Ok(Upsert(kh, entry)) => { - self.handle_upsert(kh, entry, ts, deqs, &freq, total_weight) - } - Ok(Remove(key, entry)) => Self::handle_remove(deqs, &key, entry, total_weight), + Ok(Upsert(kh, entry)) => self.handle_upsert(kh, entry, ts, deqs, &freq, total_cost), + Ok(Remove(key, entry)) => Self::handle_remove(deqs, &key, entry, total_cost), Err(_) => break, }; } @@ -718,12 +725,11 @@ where timestamp: Instant, deqs: &mut Deques, freq: &FrequencySketch, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { entry.set_last_accessed(timestamp); entry.set_last_modified(timestamp); - let last_accessed = entry.raw_last_accessed(); - let last_modified = entry.raw_last_modified(); + let raw_ts = RawTimestamps::new(&entry); if entry.is_admitted() { // The entry has been already admitted, so treat this as an update. @@ -732,17 +738,17 @@ where return; } - let space_needed = total_weight.weight(&kh.key, &entry.value); + let size = total_cost.cost(&kh.key, &entry.value); - if self.has_enough_capacity(total_weight, space_needed) { + if self.has_enough_capacity(size, total_cost) { // There are enough room in the cache (or the cache is unbounded). // Add the candidate to the deques. - self.handle_admit(kh, &entry, last_accessed, last_modified, deqs, total_weight); + self.handle_admit(kh, &entry, size, raw_ts, deqs, total_cost); return; } if let Some(max) = self.max_capacity { - if space_needed > max { + if size > max { // The candidate is too big to fit in the cache. Reject it. self.cache.remove(&Arc::clone(&kh.key)); return; @@ -752,7 +758,7 @@ where let skipped; // Try to admit the candidate. - match Self::admit(kh.hash, space_needed, &self.cache, deqs, freq, total_weight) { + match Self::admit(kh.hash, size, &self.cache, deqs, freq, total_cost) { AdmissionResult::Admitted { victims, mut skipped_victims, @@ -764,7 +770,7 @@ where .remove_entry(unsafe { &victim.as_ref().element.key }) { // And then remove the victim from the deques. - Self::handle_remove(deqs, &vic_key, vic_entry, total_weight); + Self::handle_remove(deqs, &vic_key, vic_entry, total_cost); } else { // Could not remove the victim from the cache. Skip this // victim node as its ValueEntry might have been @@ -775,14 +781,7 @@ where skipped = skipped_victims; // Add the candidate to the deques. - self.handle_admit( - kh, - &entry, - Arc::clone(&last_accessed), - Arc::clone(&last_modified), - deqs, - total_weight, - ); + self.handle_admit(kh, &entry, size, raw_ts, deqs, total_cost); } AdmissionResult::Rejected { skipped_victims } => { skipped = skipped_victims; @@ -802,26 +801,26 @@ where #[inline] fn admit( candidate_hash: u64, - space_needed: u64, + candidate_cost: u64, cache: &CacheStore, deqs: &Deques, freq: &FrequencySketch, - total_weight: &TotalWeight<'_, K, V>, + total_cost: &TotalCost<'_, K, V>, ) -> AdmissionResult { let candidate_freq = freq.frequency(candidate_hash) as u32; let mut victims_freq = 0u32; - let mut victims_size = 0u64; + let mut victims_cost = 0u64; let mut victims = Vec::default(); let mut skipped_victims = Vec::default(); - let mut current_victim; + let mut next_victim; // Find first victim. loop { if let Some(victim) = deqs.probation.peek_front() { if let Some(vic_entry) = cache.get(&victim.element.key) { victims_freq += freq.frequency(victim.element.hash) as u32; - victims_size += total_weight.weight(&victim.element.key, &vic_entry.value); - current_victim = victim.next_node(); + victims_cost += total_cost.cost(&victim.element.key, &vic_entry.value); + next_victim = victim.next_node(); victims.push(NonNull::from(victim)); break; } else { @@ -836,15 +835,16 @@ where } // Aggregate victims. - while victims_size < space_needed { + while victims_cost < candidate_cost { if candidate_freq < victims_freq { break; } - if let Some(victim) = current_victim.take() { + if let Some(victim) = next_victim.take() { + next_victim = victim.next_node(); + if let Some(vic_entry) = cache.get(&victim.element.key) { victims_freq += freq.frequency(victim.element.hash) as u32; - victims_size += total_weight.weight(&victim.element.key, &vic_entry.value); - current_victim = victim.next_node(); + victims_cost += total_cost.cost(&victim.element.key, &vic_entry.value); victims.push(NonNull::from(victim)); } else { // Could not get the victim from the cache. Skip this node as its @@ -862,14 +862,12 @@ where // TODO: Implement some randomness to mitigate hash DoS attack? // See Caffeine's implementation. - if victims_size >= space_needed && candidate_freq >= victims_freq { - dbg!("admitted"); + if victims_cost >= candidate_cost && candidate_freq >= victims_freq { AdmissionResult::Admitted { victims, skipped_victims, } } else { - dbg!("rejected"); AdmissionResult::Rejected { skipped_victims } } } @@ -878,20 +876,20 @@ where &self, kh: KeyHash, entry: &Arc>, - raw_last_accessed: Arc, - raw_last_modified: Arc, + cost: u64, + raw_ts: RawTimestamps, deqs: &mut Deques, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { let key = Arc::clone(&kh.key); - total_weight.saturating_add(&key, &entry.value); + total_cost.saturating_add(cost); deqs.push_back_ao( CacheRegion::MainProbation, - KeyHashDate::new(kh, raw_last_accessed), + KeyHashDate::new(kh, raw_ts.last_accessed), entry, ); if self.is_write_order_queue_enabled() { - deqs.push_back_wo(KeyDate::new(key, raw_last_modified), entry); + deqs.push_back_wo(KeyDate::new(key, raw_ts.last_modified), entry); } entry.set_is_admitted(true); } @@ -900,11 +898,11 @@ where deqs: &mut Deques, key: &Arc, entry: Arc>, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { if entry.is_admitted() { entry.set_is_admitted(false); - total_weight.saturating_sub(key, &entry.value); + total_cost.saturating_sub(key, &entry.value); deqs.unlink_ao(&entry); Deques::unlink_wo(&mut deqs.write_order, &entry); } @@ -917,27 +915,22 @@ where wo_deq: &mut Deque>, key: &Arc, entry: Arc>, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { if entry.is_admitted() { entry.set_is_admitted(false); - total_weight.saturating_sub(key, &entry.value); + total_cost.saturating_sub(key, &entry.value); Deques::unlink_ao_from_deque(ao_deq_name, ao_deq, &entry); Deques::unlink_wo(wo_deq, &entry); } entry.unset_q_nodes(); } - fn evict( - &self, - deqs: &mut Deques, - batch_size: usize, - total_weight: &mut TotalWeight<'_, K, V>, - ) { + fn evict(&self, deqs: &mut Deques, batch_size: usize, total_cost: &mut TotalCost<'_, K, V>) { let now = self.current_time_from_expiration_clock(); if self.is_write_order_queue_enabled() { - self.remove_expired_wo(deqs, batch_size, now, total_weight); + self.remove_expired_wo(deqs, batch_size, now, total_cost); } if self.time_to_idle.is_some() || self.has_valid_after() { @@ -949,7 +942,7 @@ where ); let mut rm_expired_ao = - |name, deq| self.remove_expired_ao(name, deq, wo, batch_size, now, total_weight); + |name, deq| self.remove_expired_ao(name, deq, wo, batch_size, now, total_cost); rm_expired_ao("window", window); rm_expired_ao("probation", probation); @@ -965,7 +958,7 @@ where write_order_deq: &mut Deque>, batch_size: usize, now: Instant, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { let tti = &self.time_to_idle; let va = self.valid_after(); @@ -1006,7 +999,7 @@ where write_order_deq, key, entry, - total_weight, + total_cost, ); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_accessed(); @@ -1037,7 +1030,7 @@ where deqs: &mut Deques, batch_size: usize, now: Instant, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { let ttl = &self.time_to_live; let va = self.valid_after(); @@ -1068,7 +1061,7 @@ where .remove_if(key, |_, v| is_expired_entry_wo(ttl, va, v, now)); if let Some(entry) = maybe_entry { - Self::handle_remove(deqs, key, entry, total_weight); + Self::handle_remove(deqs, key, entry, total_cost); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_modified(); if ts.is_none() { @@ -1096,9 +1089,9 @@ where invalidator: &Invalidator, deqs: &mut Deques, batch_size: usize, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { - self.process_invalidation_result(invalidator, deqs, total_weight); + self.process_invalidation_result(invalidator, deqs, total_cost); self.submit_invalidation_task(invalidator, &mut deqs.write_order, batch_size); } @@ -1106,7 +1099,7 @@ where &self, invalidator: &Invalidator, deqs: &mut Deques, - total_weight: &mut TotalWeight<'_, K, V>, + total_cost: &mut TotalCost<'_, K, V>, ) { if let Some(InvalidationResult { invalidated, @@ -1114,7 +1107,7 @@ where }) = invalidator.task_result() { for KeyValueEntry { key, entry } in invalidated { - Self::handle_remove(deqs, &key, entry, total_weight); + Self::handle_remove(deqs, &key, entry, total_cost); } if is_done { deqs.write_order.reset_cursor(); From 9d0e70223cda2ac3d6a5f8c1d3e1c11d0f1e8e3f Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 22:12:56 +0800 Subject: [PATCH 09/42] Size-aware cache management - Rename many internal items, for example: - TotalCost struct -> WeightedSize struct - cost() method -> weigh() method - cost variable -> policy_weight variable - NOTE: Cost usually means the miss penalty in cache. - Add EntrySizeAndFrequency struct for admit() method. --- src/sync/base_cache.rs | 215 +++++++++++++++++++++-------------------- 1 file changed, 112 insertions(+), 103 deletions(-) diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 7829817a..fbb17967 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -346,28 +346,51 @@ where } } -struct TotalCost<'a, K, V> { - total: u64, +struct WeightedSize<'a, K, V> { + size: u64, weighter: Option<&'a Weighter>, } -impl<'a, K, V> TotalCost<'a, K, V> { +impl<'a, K, V> WeightedSize<'a, K, V> { #[inline] - fn cost(&self, key: &K, value: &V) -> u64 { + fn weigh(&self, key: &K, value: &V) -> u64 { self.weighter.map(|w| w(key, value)).unwrap_or(1) } #[inline] - fn saturating_add(&mut self, cost: u64) { - let total = &mut self.total; - *total = total.saturating_add(cost); + fn saturating_add(&mut self, weight: u64) { + let total = &mut self.size; + *total = total.saturating_add(weight); } #[inline] fn saturating_sub(&mut self, key: &K, value: &V) { - let cost = self.cost(key, value); - let total = &mut self.total; - *total = total.saturating_sub(cost); + let weight = self.weigh(key, value); + let total = &mut self.size; + *total = total.saturating_sub(weight); + } +} + +#[derive(Default)] +struct EntrySizeAndFrequency { + weight: u64, + freq: u32, +} + +impl EntrySizeAndFrequency { + fn new(policy_weight: u64) -> Self { + Self { + weight: policy_weight, + ..Default::default() + } + } + + fn add_policy_weight(&mut self, ws: &WeightedSize<'_, K, V>, key: &K, value: &V) { + self.weight += ws.weigh(key, value); + } + + fn add_frequency(&mut self, freq: &FrequencySketch, hash: u64) { + self.freq += freq.frequency(hash) as u32; } } @@ -387,11 +410,11 @@ impl RawTimestamps { enum AdmissionResult { Admitted { - victims: Vec>>>, - skipped_victims: Vec>>>, + victim_nodes: Vec>>>, + skipped_nodes: Vec>>>, }, Rejected { - skipped_victims: Vec>>>, + skipped_nodes: Vec>>>, }, } @@ -401,7 +424,7 @@ type CacheEntry = (Arc, Arc>); pub(crate) struct Inner { max_capacity: Option, - total_cost: AtomicU64, + weighted_size: AtomicU64, cache: CacheStore, build_hasher: S, deques: Mutex>, @@ -451,7 +474,7 @@ where Self { max_capacity: max_capacity.map(|n| n as u64), - total_cost: AtomicU64::default(), + weighted_size: AtomicU64::default(), cache, build_hasher, deques: Mutex::new(Deques::default()), @@ -614,9 +637,9 @@ where let mut calls = 0; let mut should_sync = true; - let current_total_cost = self.total_cost.load(Ordering::Acquire); - let mut total_cost = TotalCost { - total: current_total_cost, + let current_ws = self.weighted_size.load(Ordering::Acquire); + let mut ws = WeightedSize { + size: current_ws, weighter: self.weighter.as_ref(), }; @@ -628,7 +651,7 @@ where let w_len = self.write_op_ch.len(); if w_len > 0 { - self.apply_writes(&mut deqs, w_len, &mut total_cost); + self.apply_writes(&mut deqs, w_len, &mut ws); } calls += 1; should_sync = self.read_op_ch.len() >= READ_LOG_FLUSH_POINT @@ -636,7 +659,7 @@ where } if self.has_expiry() || self.has_valid_after() { - self.evict(&mut deqs, EVICTION_BATCH_SIZE, &mut total_cost); + self.evict(&mut deqs, EVICTION_BATCH_SIZE, &mut ws); } if self.invalidator_enabled { @@ -646,14 +669,14 @@ where invalidator, &mut deqs, INVALIDATION_BATCH_SIZE, - &mut total_cost, + &mut ws, ); } } } - debug_assert_eq!(self.total_cost.load(Ordering::Acquire), current_total_cost); - self.total_cost.store(total_cost.total, Ordering::Release); + debug_assert_eq!(self.weighted_size.load(Ordering::Acquire), current_ws); + self.weighted_size.store(ws.size, Ordering::Release); if should_sync { Some(SyncPace::Fast) @@ -675,9 +698,9 @@ where V: Send + Sync + 'static, S: BuildHasher + Clone + Send + Sync + 'static, { - fn has_enough_capacity(&self, candidate_cost: u64, total_cost: &TotalCost<'_, K, V>) -> bool { + fn has_enough_capacity(&self, candidate_weight: u64, ws: &WeightedSize<'_, K, V>) -> bool { self.max_capacity - .map(|limit| total_cost.total + candidate_cost <= limit) + .map(|limit| ws.size + candidate_weight <= limit) .unwrap_or(true) } @@ -698,12 +721,7 @@ where } } - fn apply_writes( - &self, - deqs: &mut Deques, - count: usize, - total_cost: &mut TotalCost<'_, K, V>, - ) { + fn apply_writes(&self, deqs: &mut Deques, count: usize, ws: &mut WeightedSize<'_, K, V>) { use WriteOp::*; let freq = self.frequency_sketch.read(); let ch = &self.write_op_ch; @@ -711,8 +729,8 @@ where for _ in 0..count { match ch.try_recv() { - Ok(Upsert(kh, entry)) => self.handle_upsert(kh, entry, ts, deqs, &freq, total_cost), - Ok(Remove(key, entry)) => Self::handle_remove(deqs, &key, entry, total_cost), + Ok(Upsert(kh, entry)) => self.handle_upsert(kh, entry, ts, deqs, &freq, ws), + Ok(Remove(key, entry)) => Self::handle_remove(deqs, &key, entry, ws), Err(_) => break, }; } @@ -725,7 +743,7 @@ where timestamp: Instant, deqs: &mut Deques, freq: &FrequencySketch, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { entry.set_last_accessed(timestamp); entry.set_last_modified(timestamp); @@ -738,118 +756,116 @@ where return; } - let size = total_cost.cost(&kh.key, &entry.value); + let policy_weight = ws.weigh(&kh.key, &entry.value); - if self.has_enough_capacity(size, total_cost) { + if self.has_enough_capacity(policy_weight, ws) { // There are enough room in the cache (or the cache is unbounded). // Add the candidate to the deques. - self.handle_admit(kh, &entry, size, raw_ts, deqs, total_cost); + self.handle_admit(kh, &entry, policy_weight, raw_ts, deqs, ws); return; } if let Some(max) = self.max_capacity { - if size > max { + if policy_weight > max { // The candidate is too big to fit in the cache. Reject it. self.cache.remove(&Arc::clone(&kh.key)); return; } } - let skipped; + let skipped_nodes; + let mut candidate = EntrySizeAndFrequency::new(policy_weight); + candidate.add_frequency(freq, kh.hash); // Try to admit the candidate. - match Self::admit(kh.hash, size, &self.cache, deqs, freq, total_cost) { + match Self::admit(&candidate, &self.cache, deqs, freq, ws) { AdmissionResult::Admitted { - victims, - mut skipped_victims, + victim_nodes, + skipped_nodes: mut skipped, } => { // Try to remove the victims from the cache (hash map). - for victim in victims { + for victim in victim_nodes { if let Some((vic_key, vic_entry)) = self .cache .remove_entry(unsafe { &victim.as_ref().element.key }) { // And then remove the victim from the deques. - Self::handle_remove(deqs, &vic_key, vic_entry, total_cost); + Self::handle_remove(deqs, &vic_key, vic_entry, ws); } else { // Could not remove the victim from the cache. Skip this // victim node as its ValueEntry might have been - // invalidated. Add it to the skipped_victim. - skipped_victims.push(victim); + // invalidated. Add it to the skipped nodes. + skipped.push(victim); } } - skipped = skipped_victims; + skipped_nodes = skipped; // Add the candidate to the deques. - self.handle_admit(kh, &entry, size, raw_ts, deqs, total_cost); + self.handle_admit(kh, &entry, policy_weight, raw_ts, deqs, ws); } - AdmissionResult::Rejected { skipped_victims } => { - skipped = skipped_victims; + AdmissionResult::Rejected { skipped_nodes: s } => { + skipped_nodes = s; // Remove the candidate from the cache (hash map). self.cache.remove(&Arc::clone(&kh.key)); } }; - // Move the skipped victim nodes to the back of the deque. We do not unlink - // (drop) these nodes because ValueEntries in the write op queue should be - // pointing them. - for node in skipped { + // Move the skipped nodes to the back of the deque. We do not unlink (drop) + // them because ValueEntries in the write op queue should be pointing them. + for node in skipped_nodes { unsafe { deqs.probation.move_to_back(node) }; } } #[inline] fn admit( - candidate_hash: u64, - candidate_cost: u64, + candidate: &EntrySizeAndFrequency, cache: &CacheStore, deqs: &Deques, freq: &FrequencySketch, - total_cost: &TotalCost<'_, K, V>, + ws: &WeightedSize<'_, K, V>, ) -> AdmissionResult { - let candidate_freq = freq.frequency(candidate_hash) as u32; - let mut victims_freq = 0u32; - let mut victims_cost = 0u64; - let mut victims = Vec::default(); - let mut skipped_victims = Vec::default(); + let mut victims = EntrySizeAndFrequency::default(); + let mut victim_nodes = Vec::default(); + let mut skipped_nodes = Vec::default(); let mut next_victim; // Find first victim. loop { if let Some(victim) = deqs.probation.peek_front() { if let Some(vic_entry) = cache.get(&victim.element.key) { - victims_freq += freq.frequency(victim.element.hash) as u32; - victims_cost += total_cost.cost(&victim.element.key, &vic_entry.value); + victims.add_policy_weight(ws, &victim.element.key, &vic_entry.value); + victims.add_frequency(freq, victim.element.hash); next_victim = victim.next_node(); - victims.push(NonNull::from(victim)); + victim_nodes.push(NonNull::from(victim)); break; } else { // Could not get the victim from the cache. Skip this node as its // ValueEntry might have been invalidated. - skipped_victims.push(NonNull::from(victim)); + skipped_nodes.push(NonNull::from(victim)); } } else { // No more victims. Reject the candidate. - return AdmissionResult::Rejected { skipped_victims }; + return AdmissionResult::Rejected { skipped_nodes }; } } // Aggregate victims. - while victims_cost < candidate_cost { - if candidate_freq < victims_freq { + while victims.weight < candidate.weight { + if candidate.freq < victims.freq { break; } if let Some(victim) = next_victim.take() { next_victim = victim.next_node(); if let Some(vic_entry) = cache.get(&victim.element.key) { - victims_freq += freq.frequency(victim.element.hash) as u32; - victims_cost += total_cost.cost(&victim.element.key, &vic_entry.value); - victims.push(NonNull::from(victim)); + victims.add_policy_weight(ws, &victim.element.key, &vic_entry.value); + victims.add_frequency(freq, victim.element.hash); + victim_nodes.push(NonNull::from(victim)); } else { // Could not get the victim from the cache. Skip this node as its // ValueEntry might have been invalidated. - skipped_victims.push(NonNull::from(victim)); + skipped_nodes.push(NonNull::from(victim)); } } else { // No more victims. @@ -859,16 +875,16 @@ where // Admit or reject the candidate. - // TODO: Implement some randomness to mitigate hash DoS attack? + // TODO: Implement some randomness to mitigate hash DoS attack. // See Caffeine's implementation. - if victims_cost >= candidate_cost && candidate_freq >= victims_freq { + if victims.weight >= candidate.weight && candidate.freq >= victims.freq { AdmissionResult::Admitted { - victims, - skipped_victims, + victim_nodes, + skipped_nodes, } } else { - AdmissionResult::Rejected { skipped_victims } + AdmissionResult::Rejected { skipped_nodes } } } @@ -876,13 +892,13 @@ where &self, kh: KeyHash, entry: &Arc>, - cost: u64, + policy_weight: u64, raw_ts: RawTimestamps, deqs: &mut Deques, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { let key = Arc::clone(&kh.key); - total_cost.saturating_add(cost); + ws.saturating_add(policy_weight); deqs.push_back_ao( CacheRegion::MainProbation, KeyHashDate::new(kh, raw_ts.last_accessed), @@ -898,11 +914,11 @@ where deqs: &mut Deques, key: &Arc, entry: Arc>, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { if entry.is_admitted() { entry.set_is_admitted(false); - total_cost.saturating_sub(key, &entry.value); + ws.saturating_sub(key, &entry.value); deqs.unlink_ao(&entry); Deques::unlink_wo(&mut deqs.write_order, &entry); } @@ -915,22 +931,22 @@ where wo_deq: &mut Deque>, key: &Arc, entry: Arc>, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { if entry.is_admitted() { entry.set_is_admitted(false); - total_cost.saturating_sub(key, &entry.value); + ws.saturating_sub(key, &entry.value); Deques::unlink_ao_from_deque(ao_deq_name, ao_deq, &entry); Deques::unlink_wo(wo_deq, &entry); } entry.unset_q_nodes(); } - fn evict(&self, deqs: &mut Deques, batch_size: usize, total_cost: &mut TotalCost<'_, K, V>) { + fn evict(&self, deqs: &mut Deques, batch_size: usize, ws: &mut WeightedSize<'_, K, V>) { let now = self.current_time_from_expiration_clock(); if self.is_write_order_queue_enabled() { - self.remove_expired_wo(deqs, batch_size, now, total_cost); + self.remove_expired_wo(deqs, batch_size, now, ws); } if self.time_to_idle.is_some() || self.has_valid_after() { @@ -942,7 +958,7 @@ where ); let mut rm_expired_ao = - |name, deq| self.remove_expired_ao(name, deq, wo, batch_size, now, total_cost); + |name, deq| self.remove_expired_ao(name, deq, wo, batch_size, now, ws); rm_expired_ao("window", window); rm_expired_ao("probation", probation); @@ -958,7 +974,7 @@ where write_order_deq: &mut Deque>, batch_size: usize, now: Instant, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { let tti = &self.time_to_idle; let va = self.valid_after(); @@ -993,14 +1009,7 @@ where .remove_if(key, |_, v| is_expired_entry_ao(tti, va, v, now)); if let Some(entry) = maybe_entry { - Self::handle_remove_with_deques( - deq_name, - deq, - write_order_deq, - key, - entry, - total_cost, - ); + Self::handle_remove_with_deques(deq_name, deq, write_order_deq, key, entry, ws); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_accessed(); if ts.is_none() { @@ -1030,7 +1039,7 @@ where deqs: &mut Deques, batch_size: usize, now: Instant, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { let ttl = &self.time_to_live; let va = self.valid_after(); @@ -1061,7 +1070,7 @@ where .remove_if(key, |_, v| is_expired_entry_wo(ttl, va, v, now)); if let Some(entry) = maybe_entry { - Self::handle_remove(deqs, key, entry, total_cost); + Self::handle_remove(deqs, key, entry, ws); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_modified(); if ts.is_none() { @@ -1089,9 +1098,9 @@ where invalidator: &Invalidator, deqs: &mut Deques, batch_size: usize, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { - self.process_invalidation_result(invalidator, deqs, total_cost); + self.process_invalidation_result(invalidator, deqs, ws); self.submit_invalidation_task(invalidator, &mut deqs.write_order, batch_size); } @@ -1099,7 +1108,7 @@ where &self, invalidator: &Invalidator, deqs: &mut Deques, - total_cost: &mut TotalCost<'_, K, V>, + ws: &mut WeightedSize<'_, K, V>, ) { if let Some(InvalidationResult { invalidated, @@ -1107,7 +1116,7 @@ where }) = invalidator.task_result() { for KeyValueEntry { key, entry } in invalidated { - Self::handle_remove(deqs, &key, entry, total_cost); + Self::handle_remove(deqs, &key, entry, ws); } if is_done { deqs.write_order.reset_cursor(); From a6a2c0d47b62bde614342ae5fcd064a28e696281 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 22:56:59 +0800 Subject: [PATCH 10/42] Size-aware cache management In order to reduce chances to pollute the cache main space, restore the old admission behavior, which requires the candidate frequency must be higher than the aggregated frequency of potential victims. --- .vscode/settings.json | 3 +++ src/future/cache.rs | 20 ++++---------------- src/sync/base_cache.rs | 27 ++++++++++++++++++++++----- src/sync/cache.rs | 10 ++-------- src/sync/segment.rs | 10 ++-------- src/unsync/cache.rs | 11 +++-------- 6 files changed, 36 insertions(+), 45 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index fc424e74..e407493a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,10 +4,13 @@ "CLFU", "Deque", "Deques", + "Einziger", + "Eytan", "Hasher", "Kawano", "MSRV", "Moka", + "Ohad", "RUSTFLAGS", "Ristretto", "Tatsuya", diff --git a/src/future/cache.rs b/src/future/cache.rs index a9bfe072..743c8a87 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -635,16 +635,10 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); - assert_eq!(cache.get(&"a"), Some("alice")); - assert_eq!(cache.get(&"b"), Some("bob")); - assert_eq!(cache.get(&"c"), Some("cindy")); - cache.sync(); - // counts: a -> 2, b -> 2, c -> 2 - assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 3, b -> 3, c -> 2 + // counts: a -> 2, b -> 2, c -> 1 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david").await; // count: d -> 0 @@ -656,7 +650,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency equals to c's. + // because d's frequency is higher than to c's. cache.insert("d", "dennis").await; cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); @@ -688,16 +682,10 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); - assert_eq!(cache.get(&"a"), Some("alice")); - assert_eq!(cache.get(&"b"), Some("bob")); - assert_eq!(cache.get(&"c"), Some("cindy")); - cache.sync(); - // counts: a -> 2, b -> 2, c -> 2 - assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 3, b -> 3, c -> 2 + // counts: a -> 2, b -> 2, c -> 1 // "d" should not be admitted because its frequency is too low. cache.blocking_insert("d", "david"); // count: d -> 0 @@ -709,7 +697,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency equals to c's. + // because d's frequency is higher than to c's. cache.blocking_insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index fbb17967..54a5cc41 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -817,6 +817,23 @@ where } } + /// Performs size-aware admission explained in the paper: + /// [Lightweight Robust Size Aware Cache Management][size-aware-cache-paper] + /// by Gil Einziger, Ohad Eytan, Roy Friedman, Ben Manes. + /// + /// [size-aware-cache-paper]: https://arxiv.org/abs/2105.08770 + /// + /// There are some modifications in this implementation: + /// - To admit to the main space, candidate's frequency must be higher than + /// the aggregated frequencies of the potential victims. (In the paper, + /// `>=` operator is used rather than `>`) The `>` operator will do a better + /// job to prevent the main space from polluting. + /// - When a candidate is rejected, the potential victims will stay at the LRU + /// position of the probation access-order queue. (In the paper, they will be + /// promoted (to the MRU position?) to force the eviction policy to select a + /// different set of victims for the next candidate). We may implement the + /// paper's behavior later? + /// #[inline] fn admit( candidate: &EntrySizeAndFrequency, @@ -840,8 +857,8 @@ where victim_nodes.push(NonNull::from(victim)); break; } else { - // Could not get the victim from the cache. Skip this node as its - // ValueEntry might have been invalidated. + // Could not get the victim from the cache (hash map). Skip this node + // as its ValueEntry might have been invalidated. skipped_nodes.push(NonNull::from(victim)); } } else { @@ -863,8 +880,8 @@ where victims.add_frequency(freq, victim.element.hash); victim_nodes.push(NonNull::from(victim)); } else { - // Could not get the victim from the cache. Skip this node as its - // ValueEntry might have been invalidated. + // Could not get the victim from the cache (hash map). Skip this node + // as its ValueEntry might have been invalidated. skipped_nodes.push(NonNull::from(victim)); } } else { @@ -878,7 +895,7 @@ where // TODO: Implement some randomness to mitigate hash DoS attack. // See Caffeine's implementation. - if victims.weight >= candidate.weight && candidate.freq >= victims.freq { + if victims.weight >= candidate.weight && candidate.freq > victims.freq { AdmissionResult::Admitted { victim_nodes, skipped_nodes, diff --git a/src/sync/cache.rs b/src/sync/cache.rs index d458d82e..2297ed35 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -547,16 +547,10 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); - assert_eq!(cache.get(&"a"), Some("alice")); - assert_eq!(cache.get(&"b"), Some("bob")); - assert_eq!(cache.get(&"c"), Some("cindy")); - cache.sync(); - // counts: a -> 2, b -> 2, c -> 2 - assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 3, b -> 3, c -> 2 + // counts: a -> 2, b -> 2, c -> 1 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david"); // count: d -> 0 @@ -568,7 +562,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency equals to c's. + // because d's frequency is higher than to c's. cache.insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/sync/segment.rs b/src/sync/segment.rs index 06a66da4..10ed1974 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -450,16 +450,10 @@ mod tests { // counts: a -> 1, b -> 1, c -> 1 cache.sync(); - assert_eq!(cache.get(&"a"), Some("alice")); - assert_eq!(cache.get(&"b"), Some("bob")); - assert_eq!(cache.get(&"c"), Some("cindy")); - cache.sync(); - // counts: a -> 2, b -> 2, c -> 2 - assert_eq!(cache.get(&"a"), Some("alice")); assert_eq!(cache.get(&"b"), Some("bob")); cache.sync(); - // counts: a -> 3, b -> 3, c -> 2 + // counts: a -> 2, b -> 2, c -> 1 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david"); // count: d -> 0 @@ -471,7 +465,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency equals to c's. + // because d's frequency is higher than to c's. cache.insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index af54aaca..430445b2 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -444,7 +444,7 @@ where ) -> bool { // TODO: Implement some randomness to mitigate hash DoS attack. // See Caffeine's implementation. - freq.frequency(candidate_hash) >= freq.frequency(victim.element.hash) + freq.frequency(candidate_hash) > freq.frequency(victim.element.hash) } fn handle_update( @@ -605,12 +605,7 @@ mod tests { assert_eq!(cache.get(&"a"), Some(&"alice")); assert_eq!(cache.get(&"b"), Some(&"bob")); - assert_eq!(cache.get(&"c"), Some(&"cindy")); - // counts: a -> 2, b -> 2, c -> 2 - - assert_eq!(cache.get(&"a"), Some(&"alice")); - assert_eq!(cache.get(&"b"), Some(&"bob")); - // counts: a -> 3, b -> 3, c -> 2 + // counts: a -> 2, b -> 2, c -> 1 // "d" should not be admitted because its frequency is too low. cache.insert("d", "david"); // count: d -> 0 @@ -620,7 +615,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency equals to c's. + // because d's frequency is higher than to c's. cache.insert("d", "dennis"); assert_eq!(cache.get(&"a"), Some(&"alice")); assert_eq!(cache.get(&"b"), Some(&"bob")); From e261968a5f3b97fefe22cfd54cd0061ebe9b3144 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 23:17:15 +0800 Subject: [PATCH 11/42] Minor refactoring - Rename KeyValueEntry struct to KVEntry. - Updated WriteOp::Remove enum variant to store KVEntry. --- src/future/cache.rs | 10 +++++----- src/sync.rs | 10 ++++++++-- src/sync/base_cache.rs | 12 ++++++------ src/sync/cache.rs | 6 +++--- src/sync/invalidator.rs | 10 +++++----- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/future/cache.rs b/src/future/cache.rs index 743c8a87..d9cc8134 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -6,7 +6,7 @@ use crate::{ sync::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, - KeyValueEntry, PredicateId, WriteOp, + PredicateId, WriteOp, }, PredicateError, }; @@ -340,8 +340,8 @@ where Arc: Borrow, Q: Hash + Eq + ?Sized, { - if let Some(KeyValueEntry { key, entry }) = self.base.remove_entry(key) { - let op = WriteOp::Remove(key, entry); + if let Some(kv) = self.base.remove_entry(key) { + let op = WriteOp::Remove(kv); let hk = self.base.housekeeper.as_ref(); if Self::schedule_write_op(&self.base.write_op_ch, op, hk) .await @@ -362,8 +362,8 @@ where Arc: Borrow, Q: Hash + Eq + ?Sized, { - if let Some(KeyValueEntry { key, entry }) = self.base.remove_entry(key) { - let op = WriteOp::Remove(key, entry); + if let Some(kv) = self.base.remove_entry(key) { + let op = WriteOp::Remove(kv); let hk = self.base.housekeeper.as_ref(); if Self::blocking_schedule_write_op(&self.base.write_op_ch, op, hk).is_err() { panic!("Failed to remove"); diff --git a/src/sync.rs b/src/sync.rs index f5f43f8e..373486fb 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -92,11 +92,17 @@ impl KeyHashDate { } } -pub(crate) struct KeyValueEntry { +pub(crate) struct KVEntry { pub(crate) key: Arc, pub(crate) entry: Arc>, } +impl KVEntry { + pub(crate) fn new(key: Arc, entry: Arc>) -> Self { + Self { key, entry } + } +} + // DeqNode for an access order queue. type KeyDeqNodeAo = NonNull>>; @@ -284,5 +290,5 @@ pub(crate) enum ReadOp { pub(crate) enum WriteOp { Upsert(KeyHash, Arc>), - Remove(Arc, Arc>), + Remove(KVEntry), } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 54a5cc41..11d069a4 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -2,7 +2,7 @@ use super::{ deques::Deques, housekeeper::{Housekeeper, InnerSync, SyncPace}, invalidator::{GetOrRemoveEntry, InvalidationResult, Invalidator, KeyDateLite, PredicateFun}, - KeyDate, KeyHash, KeyHashDate, KeyValueEntry, PredicateId, ReadOp, ValueEntry, WriteOp, + KVEntry, KeyDate, KeyHash, KeyHashDate, PredicateId, ReadOp, ValueEntry, WriteOp, }; use crate::{ common::{ @@ -161,7 +161,7 @@ where } #[inline] - pub(crate) fn remove_entry(&self, key: &Q) -> Option> + pub(crate) fn remove_entry(&self, key: &Q) -> Option> where Arc: Borrow, Q: Hash + Eq + ?Sized, @@ -518,14 +518,14 @@ where } #[inline] - fn remove_entry(&self, key: &Q) -> Option> + fn remove_entry(&self, key: &Q) -> Option> where Arc: Borrow, Q: Hash + Eq + ?Sized, { self.cache .remove_entry(key) - .map(|(key, entry)| KeyValueEntry { key, entry }) + .map(|(key, entry)| KVEntry::new(key, entry)) } fn max_capacity(&self) -> Option { @@ -730,7 +730,7 @@ where for _ in 0..count { match ch.try_recv() { Ok(Upsert(kh, entry)) => self.handle_upsert(kh, entry, ts, deqs, &freq, ws), - Ok(Remove(key, entry)) => Self::handle_remove(deqs, &key, entry, ws), + Ok(Remove(KVEntry { key, entry })) => Self::handle_remove(deqs, &key, entry, ws), Err(_) => break, }; } @@ -1132,7 +1132,7 @@ where is_done, }) = invalidator.task_result() { - for KeyValueEntry { key, entry } in invalidated { + for KVEntry { key, entry } in invalidated { Self::handle_remove(deqs, &key, entry, ws); } if is_done { diff --git a/src/sync/cache.rs b/src/sync/cache.rs index 2297ed35..f7a29b25 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -2,7 +2,7 @@ use super::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, value_initializer::ValueInitializer, - ConcurrentCacheExt, KeyValueEntry, PredicateId, WriteOp, + ConcurrentCacheExt, PredicateId, WriteOp, }; use crate::{sync::value_initializer::InitResult, PredicateError}; @@ -358,8 +358,8 @@ where Arc: Borrow, Q: Hash + Eq + ?Sized, { - if let Some(KeyValueEntry { key, entry }) = self.base.remove_entry(key) { - let op = WriteOp::Remove(key, entry); + if let Some(kv) = self.base.remove_entry(key) { + let op = WriteOp::Remove(kv); let hk = self.base.housekeeper.as_ref(); Self::schedule_write_op(&self.base.write_op_ch, op, hk).expect("Failed to remove"); } diff --git a/src/sync/invalidator.rs b/src/sync/invalidator.rs index cc66bb3c..01346940 100644 --- a/src/sync/invalidator.rs +++ b/src/sync/invalidator.rs @@ -9,7 +9,7 @@ use crate::{ PredicateError, }; -use super::{base_cache::Inner, KeyValueEntry, PredicateId, PredicateIdStr, ValueEntry}; +use super::{base_cache::Inner, KVEntry, PredicateId, PredicateIdStr, ValueEntry}; use parking_lot::{Mutex, RwLock}; use quanta::Instant; @@ -59,12 +59,12 @@ impl KeyDateLite { } pub(crate) struct InvalidationResult { - pub(crate) invalidated: Vec>, + pub(crate) invalidated: Vec>, pub(crate) is_done: bool, } impl InvalidationResult { - fn new(invalidated: Vec>, is_done: bool) -> Self { + fn new(invalidated: Vec>, is_done: bool) -> Self { Self { invalidated, is_done, @@ -399,7 +399,7 @@ where let ts = candidate.timestamp; if Self::apply(&predicates, cache, key, ts) { if let Some(entry) = Self::invalidate(cache, key, ts) { - invalidated.push(KeyValueEntry { + invalidated.push(KVEntry { key: Arc::clone(key), entry, }) @@ -450,7 +450,7 @@ where } struct ScanResult { - invalidated: Vec>, + invalidated: Vec>, is_truncated: bool, newest_timestamp: Option, } From 7e7597256cc89a57c958c09635099a9e3e7d10c3 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 15 Aug 2021 23:20:38 +0800 Subject: [PATCH 12/42] Fix typos in source code comments --- src/future/cache.rs | 4 ++-- src/sync/cache.rs | 2 +- src/sync/segment.rs | 2 +- src/unsync/cache.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/future/cache.rs b/src/future/cache.rs index d9cc8134..494afa7c 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -650,7 +650,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher than to c's. + // because d's frequency is higher than c's. cache.insert("d", "dennis").await; cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); @@ -697,7 +697,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher than to c's. + // because d's frequency is higher than c's. cache.blocking_insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/sync/cache.rs b/src/sync/cache.rs index f7a29b25..d37a6553 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -562,7 +562,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher than to c's. + // because d's frequency is higher than c's. cache.insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/sync/segment.rs b/src/sync/segment.rs index 10ed1974..2068fe73 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -465,7 +465,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher than to c's. + // because d's frequency is higher than c's. cache.insert("d", "dennis"); cache.sync(); assert_eq!(cache.get(&"a"), Some("alice")); diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index 430445b2..7654f6ad 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -615,7 +615,7 @@ mod tests { assert_eq!(cache.get(&"d"), None); // d -> 2 // "d" should be admitted and "c" should be evicted - // because d's frequency is higher than to c's. + // because d's frequency is higher than c's. cache.insert("d", "dennis"); assert_eq!(cache.get(&"a"), Some(&"alice")); assert_eq!(cache.get(&"b"), Some(&"bob")); From dd354036815a2f56d7feddf23d1b96bfc78b946b Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Mon, 16 Aug 2021 00:48:24 +0800 Subject: [PATCH 13/42] Size-aware cache management - Fix a bug in admit method to go into an infinite loop when the LRU entry has been concurrently invalidated. - Also add a retry limit to admit method. --- src/sync/base_cache.rs | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 11d069a4..9d854f6c 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -842,32 +842,17 @@ where freq: &FrequencySketch, ws: &WeightedSize<'_, K, V>, ) -> AdmissionResult { + const MAX_CONSECUTIVE_RETRIES: usize = 5; + let mut retries = 0; + let mut victims = EntrySizeAndFrequency::default(); let mut victim_nodes = Vec::default(); let mut skipped_nodes = Vec::default(); - let mut next_victim; - // Find first victim. - loop { - if let Some(victim) = deqs.probation.peek_front() { - if let Some(vic_entry) = cache.get(&victim.element.key) { - victims.add_policy_weight(ws, &victim.element.key, &vic_entry.value); - victims.add_frequency(freq, victim.element.hash); - next_victim = victim.next_node(); - victim_nodes.push(NonNull::from(victim)); - break; - } else { - // Could not get the victim from the cache (hash map). Skip this node - // as its ValueEntry might have been invalidated. - skipped_nodes.push(NonNull::from(victim)); - } - } else { - // No more victims. Reject the candidate. - return AdmissionResult::Rejected { skipped_nodes }; - } - } + // Get first potential victim at the LRU position. + let mut next_victim = deqs.probation.peek_front(); - // Aggregate victims. + // Aggregate potential victims. while victims.weight < candidate.weight { if candidate.freq < victims.freq { break; @@ -879,13 +864,19 @@ where victims.add_policy_weight(ws, &victim.element.key, &vic_entry.value); victims.add_frequency(freq, victim.element.hash); victim_nodes.push(NonNull::from(victim)); + retries = 0; } else { // Could not get the victim from the cache (hash map). Skip this node // as its ValueEntry might have been invalidated. skipped_nodes.push(NonNull::from(victim)); + + retries += 1; + if retries > MAX_CONSECUTIVE_RETRIES { + break; + } } } else { - // No more victims. + // No more potential victims. break; } } From 45ee2a9802ffb0fb580c2e8e9c6688c66399c7b4 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Mon, 16 Aug 2021 22:52:13 +0800 Subject: [PATCH 14/42] Size-aware cache management - Add `weighter` method to the `CacheBuilder`s. - Add unit tests for `sync::Cache` with weighter closure registered. --- src/common/deque.rs | 45 +++++++++++++++++++++++ src/future/builder.rs | 28 ++++++++++++-- src/future/cache.rs | 15 +++++++- src/sync/base_cache.rs | 5 ++- src/sync/builder.rs | 51 +++++++++++++++++++++----- src/sync/cache.rs | 83 ++++++++++++++++++++++++++++++++++++++++-- src/sync/segment.rs | 1 + 7 files changed, 210 insertions(+), 18 deletions(-) diff --git a/src/common/deque.rs b/src/common/deque.rs index 281224c1..f55f0391 100644 --- a/src/common/deque.rs +++ b/src/common/deque.rs @@ -654,6 +654,51 @@ mod tests { assert!((&mut deque).next().is_none()); } + #[test] + fn next_node() { + let mut deque: Deque = Deque::new(MainProbation); + + let node1 = DeqNode::new(MainProbation, "a".into()); + deque.push_back(Box::new(node1)); + let node2 = DeqNode::new(MainProbation, "b".into()); + let node2_ptr = deque.push_back(Box::new(node2)); + let node3 = DeqNode::new(MainProbation, "c".into()); + let node3_ptr = deque.push_back(Box::new(node3)); + + // ------------------------------------------------------- + // First iteration. + // peek_front() -> node1 + let node1a = deque.peek_front().unwrap(); + assert_eq!(node1a.element, "a".to_string()); + let node2a = node1a.next_node().unwrap(); + assert_eq!(node2a.element, "b".to_string()); + let node3a = node2a.next_node().unwrap(); + assert_eq!(node3a.element, "c".to_string()); + assert!(node3a.next_node().is_none()); + + // ------------------------------------------------------- + // Iterate after a move_to_back. + // Move "b" to the back. So now "a" -> "c" -> "b". + unsafe { deque.move_to_back(node2_ptr) }; + let node1a = deque.peek_front().unwrap(); + assert_eq!(node1a.element, "a".to_string()); + let node3a = node1a.next_node().unwrap(); + assert_eq!(node3a.element, "c".to_string()); + let node2a = node3a.next_node().unwrap(); + assert_eq!(node2a.element, "b".to_string()); + assert!(node2a.next_node().is_none()); + + // ------------------------------------------------------- + // Iterate after an unlink. + // Unlink the second node "c". Now "a" -> "c". + unsafe { deque.unlink(node3_ptr) }; + let node1a = deque.peek_front().unwrap(); + assert_eq!(node1a.element, "a".to_string()); + let node2a = node1a.next_node().unwrap(); + assert_eq!(node2a.element, "b".to_string()); + assert!(node2a.next_node().is_none()); + } + #[test] fn drop() { use std::{cell::RefCell, rc::Rc}; diff --git a/src/future/builder.rs b/src/future/builder.rs index 20653175..644db156 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -1,3 +1,5 @@ +use crate::common::Weighter; + use super::Cache; use std::{ @@ -36,16 +38,17 @@ use std::{ /// // after 30 minutes (TTL) from the insert(). /// ``` /// -pub struct CacheBuilder { +pub struct CacheBuilder { max_capacity: Option, initial_capacity: Option, + weighter: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, cache_type: PhantomData, } -impl CacheBuilder> +impl CacheBuilder> where K: Eq + Hash + Send + Sync + 'static, V: Clone + Send + Sync + 'static, @@ -54,6 +57,7 @@ where Self { max_capacity: None, initial_capacity: None, + weighter: None, time_to_live: None, time_to_idle: None, invalidator_enabled: false, @@ -77,6 +81,7 @@ where self.max_capacity, self.initial_capacity, build_hasher, + self.weighter, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -92,6 +97,7 @@ where self.max_capacity, self.initial_capacity, hasher, + self.weighter, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -99,7 +105,15 @@ where } } -impl CacheBuilder { +impl CacheBuilder { + /// Sets the max capacity of the cache. + pub fn max_capacity(self, max_capacity: usize) -> Self { + Self { + max_capacity: Some(max_capacity), + ..self + } + } + /// Sets the initial capacity of the cache. pub fn initial_capacity(self, capacity: usize) -> Self { Self { @@ -108,6 +122,14 @@ impl CacheBuilder { } } + /// Sets the weighter closure of the cache. + pub fn weighter(self, weighter: Weighter) -> Self { + Self { + weighter: Some(weighter), + ..self + } + } + /// Sets the time to live of the cache. /// /// A cached entry will be expired after the specified duration past from diff --git a/src/future/cache.rs b/src/future/cache.rs index 494afa7c..e5d08609 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -3,6 +3,7 @@ use super::{ CacheBuilder, ConcurrentCacheExt, }; use crate::{ + common::Weighter, sync::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, @@ -223,10 +224,18 @@ where /// [builder-struct]: ./struct.CacheBuilder.html pub fn new(max_capacity: usize) -> Self { let build_hasher = RandomState::default(); - Self::with_everything(Some(max_capacity), None, build_hasher, None, None, false) + Self::with_everything( + Some(max_capacity), + None, + build_hasher, + None, + None, + None, + false, + ) } - pub fn builder() -> CacheBuilder> { + pub fn builder() -> CacheBuilder> { CacheBuilder::unbound() } } @@ -241,6 +250,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, + weighter: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -250,6 +260,7 @@ where max_capacity, initial_capacity, build_hasher.clone(), + weighter, time_to_live, time_to_idle, invalidator_enabled, diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 9d854f6c..64b6a34e 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -86,6 +86,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, + weighter: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -96,6 +97,7 @@ where max_capacity, initial_capacity, build_hasher, + weighter, r_rcv, w_rcv, time_to_live, @@ -454,6 +456,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, + weighter: Option>, read_op_ch: Receiver>, write_op_ch: Receiver>, time_to_live: Option, @@ -484,7 +487,7 @@ where time_to_live, time_to_idle, valid_after: AtomicU64::new(0), - weighter: None, + weighter, invalidator_enabled, // When enabled, this field will be set later via the set_invalidator method. invalidator: RwLock::new(None), diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 79e709c9..82901794 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -1,3 +1,5 @@ +use crate::common::Weighter; + use super::{Cache, SegmentedCache}; use std::{ @@ -38,28 +40,28 @@ use std::{ /// // after 30 minutes (TTL) from the insert(). /// ``` /// -pub struct CacheBuilder { +pub struct CacheBuilder { max_capacity: Option, initial_capacity: Option, num_segments: Option, + weighter: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, cache_type: PhantomData, } -impl CacheBuilder> +impl CacheBuilder> where K: Eq + Hash + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - /// Construct a new `CacheBuilder` that will be used to build a `Cache` or - /// `SegmentedCache` holding up to `max_capacity` entries. - pub fn new(max_capacity: usize) -> Self { + pub(crate) fn unbound() -> Self { Self { - max_capacity: Some(max_capacity), + max_capacity: None, initial_capacity: None, num_segments: None, + weighter: None, time_to_live: None, time_to_idle: None, invalidator_enabled: false, @@ -67,18 +69,31 @@ where } } + /// Construct a new `CacheBuilder` that will be used to build a `Cache` or + /// `SegmentedCache` holding up to `max_capacity` entries. + pub fn new(max_capacity: usize) -> Self { + Self { + max_capacity: Some(max_capacity), + ..Self::unbound() + } + } + /// Sets the number of segments of the cache. /// /// # Panics /// /// Panics if `num_segments` is less than or equals to 1. - pub fn segments(self, num_segments: usize) -> CacheBuilder> { + pub fn segments( + self, + num_segments: usize, + ) -> CacheBuilder> { assert!(num_segments > 1); CacheBuilder { max_capacity: self.max_capacity, initial_capacity: self.initial_capacity, num_segments: Some(num_segments), + weighter: None, time_to_live: self.time_to_live, time_to_idle: self.time_to_idle, invalidator_enabled: self.invalidator_enabled, @@ -96,6 +111,7 @@ where self.max_capacity, self.initial_capacity, build_hasher, + self.weighter, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -114,6 +130,7 @@ where self.max_capacity, self.initial_capacity, hasher, + self.weighter, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -121,7 +138,7 @@ where } } -impl CacheBuilder> +impl CacheBuilder> where K: Eq + Hash + Send + Sync + 'static, V: Clone + Send + Sync + 'static, @@ -163,7 +180,15 @@ where } } -impl CacheBuilder { +impl CacheBuilder { + /// Sets the max capacity of the cache. + pub fn max_capacity(self, max_capacity: usize) -> Self { + Self { + max_capacity: Some(max_capacity), + ..self + } + } + /// Sets the initial capacity of the cache. pub fn initial_capacity(self, capacity: usize) -> Self { Self { @@ -172,6 +197,14 @@ impl CacheBuilder { } } + /// Sets the weighter closure of the cache. + pub fn weighter(self, weighter: Weighter) -> Self { + Self { + weighter: Some(weighter), + ..self + } + } + /// Sets the time to live of the cache. /// /// A cached entry will be expired after the specified duration past from diff --git a/src/sync/cache.rs b/src/sync/cache.rs index d37a6553..1fa80f98 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -2,9 +2,9 @@ use super::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, value_initializer::ValueInitializer, - ConcurrentCacheExt, PredicateId, WriteOp, + CacheBuilder, ConcurrentCacheExt, PredicateId, WriteOp, }; -use crate::{sync::value_initializer::InitResult, PredicateError}; +use crate::{common::Weighter, sync::value_initializer::InitResult, PredicateError}; use crossbeam_channel::{Sender, TrySendError}; use std::{ @@ -195,7 +195,19 @@ where /// [builder-struct]: ./struct.CacheBuilder.html pub fn new(max_capacity: usize) -> Self { let build_hasher = RandomState::default(); - Self::with_everything(Some(max_capacity), None, build_hasher, None, None, false) + Self::with_everything( + Some(max_capacity), + None, + build_hasher, + None, + None, + None, + false, + ) + } + + pub fn builder() -> CacheBuilder> { + CacheBuilder::unbound() } } @@ -209,6 +221,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, + weighter: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -218,6 +231,7 @@ where max_capacity, initial_capacity, build_hasher.clone(), + weighter, time_to_live, time_to_idle, invalidator_enabled, @@ -574,6 +588,69 @@ mod tests { assert_eq!(cache.get(&"b"), None); } + #[test] + fn size_aware_admission() { + let weighter = |_k: &&str, v: &(&str, u64)| v.1; + + let alice = ("alice", 10u64); + let bob = ("bob", 15); + let cindy = ("cindy", 5); + let david = ("david", 15); + let dennis = ("dennis", 15); + + let mut cache = Cache::builder() + .max_capacity(31) + .weighter(Box::new(weighter)) + .build(); + cache.reconfigure_for_testing(); + + // Make the cache exterior immutable. + let cache = cache; + + cache.insert("a", alice); + cache.insert("b", bob); + assert_eq!(cache.get(&"a"), Some(alice)); + assert_eq!(cache.get(&"b"), Some(bob)); + cache.sync(); + // order (LRU -> MRU) and counts: a -> 1, b -> 1 + + cache.insert("c", cindy); + assert_eq!(cache.get(&"c"), Some(cindy)); + // order and counts: a -> 1, b -> 1, c -> 1 + cache.sync(); + + assert_eq!(cache.get(&"a"), Some(alice)); + assert_eq!(cache.get(&"b"), Some(bob)); + cache.sync(); + // order and counts: c -> 1, a -> 2, b -> 2 + + // To enter "d" (weight: 15), it needs to evict "c" (w: 5) and "a" (w: 10). + // "d" must have higher count than 3, which is the aggregated count of "a" and "c". + cache.insert("d", david); // count: d -> 0 + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 1 + + cache.insert("d", david); + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 2 + + cache.insert("d", david); + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 3 + + cache.insert("d", david); + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 4 + + // Finally "d" should be admitted by evicting "c" and "a". + cache.insert("d", dennis); + cache.sync(); + assert_eq!(cache.get(&"a"), None); + assert_eq!(cache.get(&"b"), Some(bob)); + assert_eq!(cache.get(&"c"), None); + assert_eq!(cache.get(&"d"), Some(dennis)); + } + #[test] fn basic_multi_threads() { let num_threads = 4; diff --git a/src/sync/segment.rs b/src/sync/segment.rs index 2068fe73..1e50ba31 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -382,6 +382,7 @@ where seg_max_capacity, seg_init_capacity, build_hasher.clone(), + None, // TODO time_to_live, time_to_idle, invalidator_enabled, From f133967fbd4549ca121bab86d09e203610e63964 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Tue, 17 Aug 2021 06:56:29 +0800 Subject: [PATCH 15/42] Size-aware cache management Replace Vec in AdmissionResult with SmallVec. --- .vscode/settings.json | 1 + Cargo.toml | 1 + src/sync/base_cache.rs | 13 ++++++++----- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index e407493a..43a301a0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -31,6 +31,7 @@ "rustdoc", "rustfmt", "semver", + "smallvec", "structs", "thiserror", "toolchain", diff --git a/Cargo.toml b/Cargo.toml index eade3309..8b54bc78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ once_cell = "1.7" parking_lot = "0.11" quanta = "0.9" scheduled-thread-pool = "0.2" +smallvec = "1.6" thiserror = "1.0" uuid = { version = "0.8", features = ["v4"] } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 64b6a34e..07ad5c59 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -16,6 +16,7 @@ use crate::{ use crossbeam_channel::{Receiver, Sender, TrySendError}; use parking_lot::{Mutex, RwLock}; use quanta::{Clock, Instant}; +use smallvec::SmallVec; use std::{ borrow::Borrow, collections::hash_map::RandomState, @@ -410,13 +411,15 @@ impl RawTimestamps { } } +type AOQueueNode = NonNull>>; + enum AdmissionResult { Admitted { - victim_nodes: Vec>>>, - skipped_nodes: Vec>>>, + victim_nodes: SmallVec<[AOQueueNode; 8]>, + skipped_nodes: SmallVec<[AOQueueNode; 4]>, }, Rejected { - skipped_nodes: Vec>>>, + skipped_nodes: SmallVec<[AOQueueNode; 4]>, }, } @@ -849,8 +852,8 @@ where let mut retries = 0; let mut victims = EntrySizeAndFrequency::default(); - let mut victim_nodes = Vec::default(); - let mut skipped_nodes = Vec::default(); + let mut victim_nodes = SmallVec::default(); + let mut skipped_nodes = SmallVec::default(); // Get first potential victim at the LRU position. let mut next_victim = deqs.probation.peek_front(); From 8fec64caf1fdfe1595ea1f4bea6b3154485b897f Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Tue, 17 Aug 2021 07:00:59 +0800 Subject: [PATCH 16/42] Size-aware cache management Copy a unit test size_aware_admission from sync::Cache to future::Cache. --- src/future/cache.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++ src/sync/cache.rs | 3 ++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/future/cache.rs b/src/future/cache.rs index e5d08609..81df4ca4 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -720,6 +720,70 @@ mod tests { assert_eq!(cache.get(&"b"), None); } + #[tokio::test] + async fn size_aware_admission() { + let weighter = |_k: &&str, v: &(&str, u64)| v.1; + + let alice = ("alice", 10u64); + let bob = ("bob", 15); + let cindy = ("cindy", 5); + let david = ("david", 15); + let dennis = ("dennis", 15); + + let mut cache = Cache::builder() + .max_capacity(31) + .weighter(Box::new(weighter)) + .build(); + cache.reconfigure_for_testing(); + + // Make the cache exterior immutable. + let cache = cache; + + cache.insert("a", alice).await; + cache.insert("b", bob).await; + assert_eq!(cache.get(&"a"), Some(alice)); + assert_eq!(cache.get(&"b"), Some(bob)); + cache.sync(); + // order (LRU -> MRU) and counts: a -> 1, b -> 1 + + cache.insert("c", cindy).await; + assert_eq!(cache.get(&"c"), Some(cindy)); + // order and counts: a -> 1, b -> 1, c -> 1 + cache.sync(); + + assert_eq!(cache.get(&"a"), Some(alice)); + assert_eq!(cache.get(&"b"), Some(bob)); + cache.sync(); + // order and counts: c -> 1, a -> 2, b -> 2 + + // To enter "d" (weight: 15), it needs to evict "c" (w: 5) and "a" (w: 10). + // "d" must have higher count than 3, which is the aggregated count + // of "a" and "c". + cache.insert("d", david).await; // count: d -> 0 + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 1 + + cache.insert("d", david).await; + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 2 + + cache.insert("d", david).await; + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 3 + + cache.insert("d", david).await; + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 4 + + // Finally "d" should be admitted by evicting "c" and "a". + cache.insert("d", dennis).await; + cache.sync(); + assert_eq!(cache.get(&"a"), None); + assert_eq!(cache.get(&"b"), Some(bob)); + assert_eq!(cache.get(&"c"), None); + assert_eq!(cache.get(&"d"), Some(dennis)); + } + #[tokio::test] async fn basic_multi_async_tasks() { let num_threads = 4; diff --git a/src/sync/cache.rs b/src/sync/cache.rs index 1fa80f98..e991410c 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -625,7 +625,8 @@ mod tests { // order and counts: c -> 1, a -> 2, b -> 2 // To enter "d" (weight: 15), it needs to evict "c" (w: 5) and "a" (w: 10). - // "d" must have higher count than 3, which is the aggregated count of "a" and "c". + // "d" must have higher count than 3, which is the aggregated count + // of "a" and "c". cache.insert("d", david); // count: d -> 0 cache.sync(); assert_eq!(cache.get(&"d"), None); // d -> 1 From c24ce646bff47ec2cd9494fb978f561687100f55 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Wed, 25 Aug 2021 20:25:27 +0800 Subject: [PATCH 17/42] Rename an internal type --- src/sync/base_cache.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 07ad5c59..bc888d34 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -411,15 +411,16 @@ impl RawTimestamps { } } -type AOQueueNode = NonNull>>; +// Access-Order Queue Node +type AoqNode = NonNull>>; enum AdmissionResult { Admitted { - victim_nodes: SmallVec<[AOQueueNode; 8]>, - skipped_nodes: SmallVec<[AOQueueNode; 4]>, + victim_nodes: SmallVec<[AoqNode; 8]>, + skipped_nodes: SmallVec<[AoqNode; 4]>, }, Rejected { - skipped_nodes: SmallVec<[AOQueueNode; 4]>, + skipped_nodes: SmallVec<[AoqNode; 4]>, }, } From 7e01c986d5bd425f5b3b97f1d3d793d82e0ea322 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Wed, 25 Aug 2021 20:25:41 +0800 Subject: [PATCH 18/42] Update the change log --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02ca0dc8..9f31fe78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## Version 0.6.0 (Unreleased) +### Added + +- Add support for size aware admission policy. ([#24][gh-pull-0024]) +- Add support for unbound cache. ([#24][gh-pull-0024]) + ### Changed - Change `get_or_try_insert_with` to return a concrete error type rather @@ -89,6 +94,7 @@ [caffeine-git]: https://github.com/ben-manes/caffeine +[gh-pull-0024]: https://github.com/moka-rs/moka/pull/24/ [gh-pull-0023]: https://github.com/moka-rs/moka/pull/23/ [gh-pull-0022]: https://github.com/moka-rs/moka/pull/22/ [gh-pull-0020]: https://github.com/moka-rs/moka/pull/20/ From 6dba09b2eb880132f05bcc83f78aca5ce78ef5a5 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Wed, 25 Aug 2021 20:38:19 +0800 Subject: [PATCH 19/42] Fix typos (weigher) --- src/common.rs | 2 +- src/future/builder.rs | 16 ++++++++-------- src/future/cache.rs | 10 +++++----- src/sync/base_cache.rs | 18 +++++++++--------- src/sync/builder.rs | 18 +++++++++--------- src/sync/cache.rs | 10 +++++----- 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/common.rs b/src/common.rs index b7279bc0..56d5f81a 100644 --- a/src/common.rs +++ b/src/common.rs @@ -6,7 +6,7 @@ pub(crate) mod frequency_sketch; pub(crate) mod thread_pool; pub(crate) mod unsafe_weak_pointer; -pub(crate) type Weighter = Box u64 + Send + Sync + 'static>; +pub(crate) type Weigher = Box u64 + Send + Sync + 'static>; pub(crate) trait AccessTime { fn last_accessed(&self) -> Option; diff --git a/src/future/builder.rs b/src/future/builder.rs index 644db156..1f039969 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -1,4 +1,4 @@ -use crate::common::Weighter; +use crate::common::Weigher; use super::Cache; @@ -41,7 +41,7 @@ use std::{ pub struct CacheBuilder { max_capacity: Option, initial_capacity: Option, - weighter: Option>, + weigher: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -57,7 +57,7 @@ where Self { max_capacity: None, initial_capacity: None, - weighter: None, + weigher: None, time_to_live: None, time_to_idle: None, invalidator_enabled: false, @@ -81,7 +81,7 @@ where self.max_capacity, self.initial_capacity, build_hasher, - self.weighter, + self.weigher, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -97,7 +97,7 @@ where self.max_capacity, self.initial_capacity, hasher, - self.weighter, + self.weigher, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -122,10 +122,10 @@ impl CacheBuilder { } } - /// Sets the weighter closure of the cache. - pub fn weighter(self, weighter: Weighter) -> Self { + /// Sets the weigher closure of the cache. + pub fn weigher(self, weigher: Weigher) -> Self { Self { - weighter: Some(weighter), + weigher: Some(weigher), ..self } } diff --git a/src/future/cache.rs b/src/future/cache.rs index 81df4ca4..e51cd593 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -3,7 +3,7 @@ use super::{ CacheBuilder, ConcurrentCacheExt, }; use crate::{ - common::Weighter, + common::Weigher, sync::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, @@ -250,7 +250,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, - weighter: Option>, + weigher: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -260,7 +260,7 @@ where max_capacity, initial_capacity, build_hasher.clone(), - weighter, + weigher, time_to_live, time_to_idle, invalidator_enabled, @@ -722,7 +722,7 @@ mod tests { #[tokio::test] async fn size_aware_admission() { - let weighter = |_k: &&str, v: &(&str, u64)| v.1; + let weigher = |_k: &&str, v: &(&str, u64)| v.1; let alice = ("alice", 10u64); let bob = ("bob", 15); @@ -732,7 +732,7 @@ mod tests { let mut cache = Cache::builder() .max_capacity(31) - .weighter(Box::new(weighter)) + .weigher(Box::new(weigher)) .build(); cache.reconfigure_for_testing(); diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index bc888d34..2aeca39e 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -8,7 +8,7 @@ use crate::{ common::{ deque::{CacheRegion, DeqNode, Deque}, frequency_sketch::FrequencySketch, - AccessTime, Weighter, + AccessTime, Weigher, }, PredicateError, }; @@ -87,7 +87,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, - weighter: Option>, + weigher: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -98,7 +98,7 @@ where max_capacity, initial_capacity, build_hasher, - weighter, + weigher, r_rcv, w_rcv, time_to_live, @@ -351,13 +351,13 @@ where struct WeightedSize<'a, K, V> { size: u64, - weighter: Option<&'a Weighter>, + weigher: Option<&'a Weigher>, } impl<'a, K, V> WeightedSize<'a, K, V> { #[inline] fn weigh(&self, key: &K, value: &V) -> u64 { - self.weighter.map(|w| w(key, value)).unwrap_or(1) + self.weigher.map(|w| w(key, value)).unwrap_or(1) } #[inline] @@ -440,7 +440,7 @@ pub(crate) struct Inner { time_to_live: Option, time_to_idle: Option, valid_after: AtomicU64, - weighter: Option>, + weigher: Option>, invalidator_enabled: bool, invalidator: RwLock>>, has_expiration_clock: AtomicBool, @@ -460,7 +460,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, - weighter: Option>, + weigher: Option>, read_op_ch: Receiver>, write_op_ch: Receiver>, time_to_live: Option, @@ -491,7 +491,7 @@ where time_to_live, time_to_idle, valid_after: AtomicU64::new(0), - weighter, + weigher, invalidator_enabled, // When enabled, this field will be set later via the set_invalidator method. invalidator: RwLock::new(None), @@ -647,7 +647,7 @@ where let current_ws = self.weighted_size.load(Ordering::Acquire); let mut ws = WeightedSize { size: current_ws, - weighter: self.weighter.as_ref(), + weigher: self.weigher.as_ref(), }; while should_sync && calls <= max_repeats { diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 82901794..a272825c 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -1,4 +1,4 @@ -use crate::common::Weighter; +use crate::common::Weigher; use super::{Cache, SegmentedCache}; @@ -44,7 +44,7 @@ pub struct CacheBuilder { max_capacity: Option, initial_capacity: Option, num_segments: Option, - weighter: Option>, + weigher: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -61,7 +61,7 @@ where max_capacity: None, initial_capacity: None, num_segments: None, - weighter: None, + weigher: None, time_to_live: None, time_to_idle: None, invalidator_enabled: false, @@ -93,7 +93,7 @@ where max_capacity: self.max_capacity, initial_capacity: self.initial_capacity, num_segments: Some(num_segments), - weighter: None, + weigher: None, time_to_live: self.time_to_live, time_to_idle: self.time_to_idle, invalidator_enabled: self.invalidator_enabled, @@ -111,7 +111,7 @@ where self.max_capacity, self.initial_capacity, build_hasher, - self.weighter, + self.weigher, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -130,7 +130,7 @@ where self.max_capacity, self.initial_capacity, hasher, - self.weighter, + self.weigher, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -197,10 +197,10 @@ impl CacheBuilder { } } - /// Sets the weighter closure of the cache. - pub fn weighter(self, weighter: Weighter) -> Self { + /// Sets the weigher closure of the cache. + pub fn weigher(self, weigher: Weigher) -> Self { Self { - weighter: Some(weighter), + weigher: Some(weigher), ..self } } diff --git a/src/sync/cache.rs b/src/sync/cache.rs index e991410c..b14ab6f8 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -4,7 +4,7 @@ use super::{ value_initializer::ValueInitializer, CacheBuilder, ConcurrentCacheExt, PredicateId, WriteOp, }; -use crate::{common::Weighter, sync::value_initializer::InitResult, PredicateError}; +use crate::{common::Weigher, sync::value_initializer::InitResult, PredicateError}; use crossbeam_channel::{Sender, TrySendError}; use std::{ @@ -221,7 +221,7 @@ where max_capacity: Option, initial_capacity: Option, build_hasher: S, - weighter: Option>, + weigher: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -231,7 +231,7 @@ where max_capacity, initial_capacity, build_hasher.clone(), - weighter, + weigher, time_to_live, time_to_idle, invalidator_enabled, @@ -590,7 +590,7 @@ mod tests { #[test] fn size_aware_admission() { - let weighter = |_k: &&str, v: &(&str, u64)| v.1; + let weigher = |_k: &&str, v: &(&str, u64)| v.1; let alice = ("alice", 10u64); let bob = ("bob", 15); @@ -600,7 +600,7 @@ mod tests { let mut cache = Cache::builder() .max_capacity(31) - .weighter(Box::new(weighter)) + .weigher(Box::new(weigher)) .build(); cache.reconfigure_for_testing(); From 5de5924611e1706d64ab24508f29c21771c54e72 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Thu, 26 Aug 2021 00:20:29 +0800 Subject: [PATCH 20/42] Size-aware cache management - Write docs. - Change the weigher type in the CacheBuilder::wegher method to take an impl trait rather than a trait object. --- README.md | 35 ++++++++-- src/future/builder.rs | 57 ++++++++++------ src/future/cache.rs | 148 ++++++++++++++++++++++++++++++++---------- src/lib.rs | 7 +- src/sync/builder.rs | 25 ++++--- src/sync/cache.rs | 128 ++++++++++++++++++++++++++---------- 6 files changed, 296 insertions(+), 104 deletions(-) diff --git a/README.md b/README.md index e2b60e44..60a30175 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,12 @@ exceeded. ## Features - Thread-safe, highly concurrent in-memory cache implementations: - - Blocking caches that can be shared across OS threads. + - Synchronous caches that can be shared across OS threads. - An asynchronous (futures aware) cache that can be accessed inside and outside of asynchronous contexts. -- A not thread-safe, in-memory cache implementation for single thread applications. -- Caches are bounded by the maximum number of entries. +- A cache can be bounded by one of the followings: + - The maximum number of entries. + - The total weighted size of entries. - Maintains good hit rate by using an entry replacement algorithms inspired by [Caffeine][caffeine-git]: - Admission to a cache is controlled by the Least Frequently Used (LFU) policy. @@ -255,6 +256,29 @@ cache.get(&key); ``` +## Example: Bounding a Cache with Weighted Size of Entry + +A `weigher` closure can be set at the cache creation time. It will calculate and +return a weighted size (relative size) of an entry. When it is set, a cache tiers to +evict entries when the total weighted size exceeds its `max_capacity`. + +```rust +use moka::sync::Cache; + +fn main() { + // Evict based on the byte length of strings in the cache. + let cache = Cache::builder() + // Up to 32MiB instead of 3M entries because this cache is going to have + // a weigher. + .max_capacity(32 * 1024 * 1024) + // A weigher closure takes &K and &V and returns a u64 representing the + // relative size of the entry. + .weigher(|_key, value: &String| -> u64 { value.len() as u64 }) + .build(); + cache.insert(0, "zero".to_string()); +} +``` + ## Example: Expiration Policies Moka supports the following expiration policies: @@ -267,12 +291,11 @@ Moka supports the following expiration policies: To set them, use the `CacheBuilder`. ```rust -use moka::sync::CacheBuilder; - +use moka::sync::Cache; use std::time::Duration; fn main() { - let cache = CacheBuilder::new(10_000) // Max 10,000 elements + let cache = Cache::builder() // Time to live (TTL): 30 minutes .time_to_live(Duration::from_secs(30 * 60)) // Time to idle (TTI): 5 minutes diff --git a/src/future/builder.rs b/src/future/builder.rs index 1f039969..b713106a 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -16,26 +16,37 @@ use std::{ /// # Examples /// /// ```rust -/// use moka::future::CacheBuilder; +/// // Cargo.toml +/// // +/// // [dependencies] +/// // moka = { version = "0.6", features = ["future"] } +/// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } +/// // futures = "0.3" /// +/// use moka::future::Cache; /// use std::time::Duration; /// -/// let cache = CacheBuilder::new(10_000) // Max 10,000 elements -/// // Time to live (TTL): 30 minutes -/// .time_to_live(Duration::from_secs(30 * 60)) -/// // Time to idle (TTI): 5 minutes -/// .time_to_idle(Duration::from_secs( 5 * 60)) -/// // Create the cache. -/// .build(); +/// #[tokio::main] +/// async fn main() { +/// let cache = Cache::builder() +/// // Max 10,000 entries +/// .max_capacity(10_000) +/// // Time to live (TTL): 30 minutes +/// .time_to_live(Duration::from_secs(30 * 60)) +/// // Time to idle (TTI): 5 minutes +/// .time_to_idle(Duration::from_secs( 5 * 60)) +/// // Create the cache. +/// .build(); /// -/// // This entry will expire after 5 minutes (TTI) if there is no get(). -/// cache.insert(0, "zero"); +/// // This entry will expire after 5 minutes (TTI) if there is no get(). +/// cache.insert(0, "zero").await; /// -/// // This get() will extend the entry life for another 5 minutes. -/// cache.get(&0); +/// // This get() will extend the entry life for another 5 minutes. +/// cache.get(&0); /// -/// // Even though we keep calling get(), the entry will expire -/// // after 30 minutes (TTL) from the insert(). +/// // Even though we keep calling get(), the entry will expire +/// // after 30 minutes (TTL) from the insert(). +/// } /// ``` /// pub struct CacheBuilder { @@ -48,12 +59,12 @@ pub struct CacheBuilder { cache_type: PhantomData, } -impl CacheBuilder> +impl Default for CacheBuilder> where K: Eq + Hash + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - pub(crate) fn unbound() -> Self { + fn default() -> Self { Self { max_capacity: None, initial_capacity: None, @@ -61,16 +72,22 @@ where time_to_live: None, time_to_idle: None, invalidator_enabled: false, - cache_type: PhantomData::default(), + cache_type: Default::default(), } } +} +impl CacheBuilder> +where + K: Eq + Hash + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ /// Construct a new `CacheBuilder` that will be used to build a `Cache` holding /// up to `max_capacity` entries. pub fn new(max_capacity: usize) -> Self { Self { max_capacity: Some(max_capacity), - ..Self::unbound() + ..Default::default() } } @@ -123,9 +140,9 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. - pub fn weigher(self, weigher: Weigher) -> Self { + pub fn weigher(self, weigher: impl Fn(&K, &V) -> u64 + Send + Sync + 'static) -> Self { Self { - weigher: Some(weigher), + weigher: Some(Box::new(weigher)), ..self } } diff --git a/src/future/cache.rs b/src/future/cache.rs index e51cd593..c130708b 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -118,29 +118,6 @@ use std::{ /// } /// ``` /// -/// # Thread Safety -/// -/// All methods provided by the `Cache` are considered thread-safe, and can be safely -/// accessed by multiple concurrent threads. -/// -/// - `Cache` requires trait bounds `Send`, `Sync` and `'static` for `K` -/// (key), `V` (value) and `S` (hasher state). -/// - `Cache` will implement `Send` and `Sync`. -/// -/// # Sharing a cache across asynchronous tasks -/// -/// To share a cache across async tasks (or OS threads), do one of the followings: -/// -/// - Create a clone of the cache by calling its `clone` method and pass it to other -/// task. -/// - Wrap the cache by a `sync::OnceCell` or `sync::Lazy` from -/// [once_cell][once-cell-crate] create, and set it to a `static` variable. -/// -/// Cloning is a cheap operation for `Cache` as it only creates thread-safe -/// reference-counted pointers to the internal data structures. -/// -/// [once-cell-crate]: https://crates.io/crates/once_cell -/// /// # Avoiding to clone the value at `get` /// /// The return type of `get` method is `Option` instead of `Option<&V>`. Every @@ -156,7 +133,65 @@ use std::{ /// /// [rustdoc-std-arc]: https://doc.rust-lang.org/stable/std/sync/struct.Arc.html /// -/// # Expiration Policies +/// # Evictions +/// +/// `Cache` provides two types of eviction: size-based eviction and time-based +/// eviction. +/// +/// ## Size-based +/// +/// ```rust +/// // Cargo.toml +/// // +/// // [dependencies] +/// // moka = { version = "0.6", features = ["future"] } +/// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } +/// // futures = "0.3" +/// +/// use moka::future::Cache; +/// +/// #[tokio::main] +/// async fn main() { +/// // Evict based on the number of entries in the cache. +/// let cache = Cache::builder() +/// // Up to 10,000 entries. +/// .max_capacity(10_000) +/// // Create the cache. +/// .build(); +/// cache.insert(1, "one".to_string()).await; +/// +/// // Evict based on the byte length of strings in the cache. +/// let cache = Cache::builder() +/// // Up to 32MiB instead of 3M entries because this cache is going to have +/// // a weigher. +/// .max_capacity(32 * 1024 * 1024) +/// // A weigher closure takes &K and &V and returns a u64 representing the +/// // relative size of the entry. +/// .weigher(|_key, value: &String| -> u64 { value.len() as u64 }) +/// .build(); +/// cache.insert(2, "two".to_string()).await; +/// } +/// ``` +/// +/// If your cache should not grow beyond a certain size, use the `max_capacity` +/// method of the [`CacheBuilder`][builder-struct] to set the upper bound. The cache +/// will try to evict entries that have not been used recently or very often. +/// +/// At the cache creation time, a weigher closure can be set by the `weigher` method +/// of the `CacheBuilder`. A weigher closure takes `&K` and `&V` as the arguments and +/// returns a `u64` representing the relative size of the entry: +/// +/// - If the `weigher` is _not_ set, the cache will treat each entry has the same +/// size of `1`. This means the cache will be bounded by the number of entries. +/// - If the `weigher` is set, the cache will call the weigher to calculate the +/// weighted size (relative size) on an entry. This means the cache will be bounded +/// by the total weighted size of entries. +/// +/// Note that weighted sizes are not used when making eviction selections. +/// +/// [builder-struct]: ./struct.CacheBuilder.html +/// +/// ## Time-based (Expirations) /// /// `Cache` supports the following expiration policies: /// @@ -165,10 +200,60 @@ use std::{ /// - **Time to idle**: A cached entry will be expired after the specified duration /// past from `get` or `insert`. /// -/// See the [`CacheBuilder`][builder-struct]'s doc for how to configure a cache -/// with them. +/// ```rust +/// // Cargo.toml +/// // +/// // [dependencies] +/// // moka = { version = "0.6", features = ["future"] } +/// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } +/// // futures = "0.3" /// -/// [builder-struct]: ./struct.CacheBuilder.html +/// use moka::future::Cache; +/// use std::time::Duration; +/// +/// #[tokio::main] +/// async fn main() { +/// let cache = Cache::builder() +/// // Time to live (TTL): 30 minutes +/// .time_to_live(Duration::from_secs(30 * 60)) +/// // Time to idle (TTI): 5 minutes +/// .time_to_idle(Duration::from_secs( 5 * 60)) +/// // Create the cache. +/// .build(); +/// +/// // This entry will expire after 5 minutes (TTI) if there is no get(). +/// cache.insert(0, "zero").await; +/// +/// // This get() will extend the entry life for another 5 minutes. +/// cache.get(&0); +/// +/// // Even though we keep calling get(), the entry will expire +/// // after 30 minutes (TTL) from the insert(). +/// } +/// ``` +/// +/// # Thread Safety +/// +/// All methods provided by the `Cache` are considered thread-safe, and can be safely +/// accessed by multiple concurrent threads. +/// +/// - `Cache` requires trait bounds `Send`, `Sync` and `'static` for `K` +/// (key), `V` (value) and `S` (hasher state). +/// - `Cache` will implement `Send` and `Sync`. +/// +/// # Sharing a cache across asynchronous tasks +/// +/// To share a cache across async tasks (or OS threads), do one of the followings: +/// +/// - Create a clone of the cache by calling its `clone` method and pass it to other +/// task. +/// - Wrap the cache by a `sync::OnceCell` or `sync::Lazy` from +/// [once_cell][once-cell-crate] create, and set it to a `static` variable. +/// +/// Cloning is a cheap operation for `Cache` as it only creates thread-safe +/// reference-counted pointers to the internal data structures. +/// +/// [once-cell-crate]: https://crates.io/crates/once_cell /// /// # Hashing Algorithm /// @@ -236,7 +321,7 @@ where } pub fn builder() -> CacheBuilder> { - CacheBuilder::unbound() + CacheBuilder::default() } } @@ -721,7 +806,7 @@ mod tests { } #[tokio::test] - async fn size_aware_admission() { + async fn size_aware_eviction() { let weigher = |_k: &&str, v: &(&str, u64)| v.1; let alice = ("alice", 10u64); @@ -730,10 +815,7 @@ mod tests { let david = ("david", 15); let dennis = ("dennis", 15); - let mut cache = Cache::builder() - .max_capacity(31) - .weigher(Box::new(weigher)) - .build(); + let mut cache = Cache::builder().max_capacity(31).weigher(weigher).build(); cache.reconfigure_for_testing(); // Make the cache exterior immutable. diff --git a/src/lib.rs b/src/lib.rs index 28886e1d..3285d64b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,11 +23,12 @@ //! # Features //! //! - Thread-safe, highly concurrent in-memory cache implementations: -//! - Blocking caches that can be shared across OS threads. +//! - Synchronous caches that can be shared across OS threads. //! - An asynchronous (futures aware) cache that can be accessed inside and //! outside of asynchronous contexts. -//! - A not thread-safe, in-memory cache implementation for single thread applications. -//! - Caches are bounded by the maximum number of entries. +//! - A cache can be bounded by one of the followings: +//! - The maximum number of entries. +//! - The total weighted size of entries. //! - Maintains good hit rate by using entry replacement algorithms inspired by //! [Caffeine][caffeine-git]: //! - Admission to a cache is controlled by the Least Frequently Used (LFU) policy. diff --git a/src/sync/builder.rs b/src/sync/builder.rs index a272825c..dfae697a 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -18,11 +18,12 @@ use std::{ /// # Examples /// /// ```rust -/// use moka::sync::CacheBuilder; -/// +/// use moka::sync::Cache; /// use std::time::Duration; /// -/// let cache = CacheBuilder::new(10_000) // Max 10,000 elements +/// let cache = Cache::builder() +/// // Max 10,000 entries +/// .max_capacity(10_000) /// // Time to live (TTL): 30 minutes /// .time_to_live(Duration::from_secs(30 * 60)) /// // Time to idle (TTI): 5 minutes @@ -51,12 +52,12 @@ pub struct CacheBuilder { cache_type: PhantomData, } -impl CacheBuilder> +impl Default for CacheBuilder> where K: Eq + Hash + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - pub(crate) fn unbound() -> Self { + fn default() -> Self { Self { max_capacity: None, initial_capacity: None, @@ -65,16 +66,22 @@ where time_to_live: None, time_to_idle: None, invalidator_enabled: false, - cache_type: PhantomData::default(), + cache_type: Default::default(), } } +} +impl CacheBuilder> +where + K: Eq + Hash + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ /// Construct a new `CacheBuilder` that will be used to build a `Cache` or /// `SegmentedCache` holding up to `max_capacity` entries. pub fn new(max_capacity: usize) -> Self { Self { max_capacity: Some(max_capacity), - ..Self::unbound() + ..Default::default() } } @@ -198,9 +205,9 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. - pub fn weigher(self, weigher: Weigher) -> Self { + pub fn weigher(self, weigher: impl Fn(&K, &V) -> u64 + Send + Sync + 'static) -> Self { Self { - weigher: Some(weigher), + weigher: Some(Box::new(weigher)), ..self } } diff --git a/src/sync/cache.rs b/src/sync/cache.rs index b14ab6f8..1b2817ac 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -89,29 +89,6 @@ use std::{ /// } /// ``` /// -/// # Thread Safety -/// -/// All methods provided by the `Cache` are considered thread-safe, and can be safely -/// accessed by multiple concurrent threads. -/// -/// - `Cache` requires trait bounds `Send`, `Sync` and `'static` for `K` -/// (key), `V` (value) and `S` (hasher state). -/// - `Cache` will implement `Send` and `Sync`. -/// -/// # Sharing a cache across threads -/// -/// To share a cache across threads, do one of the followings: -/// -/// - Create a clone of the cache by calling its `clone` method and pass it to other -/// thread. -/// - Wrap the cache by a `sync::OnceCell` or `sync::Lazy` from -/// [once_cell][once-cell-crate] create, and set it to a `static` variable. -/// -/// Cloning is a cheap operation for `Cache` as it only creates thread-safe -/// reference-counted pointers to the internal data structures. -/// -/// [once-cell-crate]: https://crates.io/crates/once_cell -/// /// # Avoiding to clone the value at `get` /// /// The return type of `get` method is `Option` instead of `Option<&V>`. Every @@ -127,7 +104,55 @@ use std::{ /// /// [rustdoc-std-arc]: https://doc.rust-lang.org/stable/std/sync/struct.Arc.html /// -/// # Expiration Policies +/// # Evictions +/// +/// `Cache` provides two types of eviction: size-based eviction and time-based +/// eviction. +/// +/// ## Size-based +/// +/// ```rust +/// use moka::sync::Cache; +/// +/// // Evict based on the number of entries in the cache. +/// let cache = Cache::builder() +/// // Up to 10,000 entries. +/// .max_capacity(10_000) +/// // Create the cache. +/// .build(); +/// cache.insert(1, "one".to_string()); +/// +/// // Evict based on the byte length of strings in the cache. +/// let cache = Cache::builder() +/// // Up to 32MiB instead of 3M entries because this cache is going to have +/// // a weigher. +/// .max_capacity(32 * 1024 * 1024) +/// // A weigher closure takes &K and &V and returns a u64 representing the +/// // relative size of the entry. +/// .weigher(|_key, value: &String| -> u64 { value.len() as u64 }) +/// .build(); +/// cache.insert(2, "two".to_string()); +/// ``` +/// +/// If your cache should not grow beyond a certain size, use the `max_capacity` +/// method of the [`CacheBuilder`][builder-struct] to set the upper bound. The cache +/// will try to evict entries that have not been used recently or very often. +/// +/// At the cache creation time, a weigher closure can be set by the `weigher` method +/// of the `CacheBuilder`. A weigher closure takes `&K` and `&V` as the arguments and +/// returns a `u64` representing the relative size of the entry: +/// +/// - If the `weigher` is _not_ set, the cache will treat each entry has the same +/// size of `1`. This means the cache will be bounded by the number of entries. +/// - If the `weigher` is set, the cache will call the weigher to calculate the +/// weighted size (relative size) on an entry. This means the cache will be bounded +/// by the total weighted size of entries. +/// +/// Note that weighted sizes are not used when making eviction selections. +/// +/// [builder-struct]: ./struct.CacheBuilder.html +/// +/// ## Time-based (Expirations) /// /// `Cache` supports the following expiration policies: /// @@ -136,10 +161,50 @@ use std::{ /// - **Time to idle**: A cached entry will be expired after the specified duration /// past from `get` or `insert`. /// -/// See the [`CacheBuilder`][builder-struct]'s doc for how to configure a cache -/// with them. +/// ```rust +/// use moka::sync::Cache; +/// use std::time::Duration; /// -/// [builder-struct]: ./struct.CacheBuilder.html +/// let cache = Cache::builder() +/// // Time to live (TTL): 30 minutes +/// .time_to_live(Duration::from_secs(30 * 60)) +/// // Time to idle (TTI): 5 minutes +/// .time_to_idle(Duration::from_secs( 5 * 60)) +/// // Create the cache. +/// .build(); +/// +/// // This entry will expire after 5 minutes (TTI) if there is no get(). +/// cache.insert(0, "zero"); +/// +/// // This get() will extend the entry life for another 5 minutes. +/// cache.get(&0); +/// +/// // Even though we keep calling get(), the entry will expire +/// // after 30 minutes (TTL) from the insert(). +/// ``` +/// +/// # Thread Safety +/// +/// All methods provided by the `Cache` are considered thread-safe, and can be safely +/// accessed by multiple concurrent threads. +/// +/// - `Cache` requires trait bounds `Send`, `Sync` and `'static` for `K` +/// (key), `V` (value) and `S` (hasher state). +/// - `Cache` will implement `Send` and `Sync`. +/// +/// # Sharing a cache across threads +/// +/// To share a cache across threads, do one of the followings: +/// +/// - Create a clone of the cache by calling its `clone` method and pass it to other +/// thread. +/// - Wrap the cache by a `sync::OnceCell` or `sync::Lazy` from +/// [once_cell][once-cell-crate] create, and set it to a `static` variable. +/// +/// Cloning is a cheap operation for `Cache` as it only creates thread-safe +/// reference-counted pointers to the internal data structures. +/// +/// [once-cell-crate]: https://crates.io/crates/once_cell /// /// # Hashing Algorithm /// @@ -207,7 +272,7 @@ where } pub fn builder() -> CacheBuilder> { - CacheBuilder::unbound() + CacheBuilder::default() } } @@ -589,7 +654,7 @@ mod tests { } #[test] - fn size_aware_admission() { + fn size_aware_eviction() { let weigher = |_k: &&str, v: &(&str, u64)| v.1; let alice = ("alice", 10u64); @@ -598,10 +663,7 @@ mod tests { let david = ("david", 15); let dennis = ("dennis", 15); - let mut cache = Cache::builder() - .max_capacity(31) - .weigher(Box::new(weigher)) - .build(); + let mut cache = Cache::builder().max_capacity(31).weigher(weigher).build(); cache.reconfigure_for_testing(); // Make the cache exterior immutable. From c1d6fbda57f1613afe403b91f4111bd50b58f908 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Thu, 26 Aug 2021 00:29:33 +0800 Subject: [PATCH 21/42] Add cargo clean step to the CI to avoid Skeptic to fail --- .github/workflows/CI.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 406b438b..64e0f711 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -43,6 +43,11 @@ jobs: - uses: Swatinem/rust-cache@v1 + - name: cargo clean + uses: actions-rs/cargo@v1 + with: + command: clean + - name: Build (no features) uses: actions-rs/cargo@v1 with: From b93ad50436a2382b010c7be7a1730f19fdb63c28 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Thu, 26 Aug 2021 00:52:37 +0800 Subject: [PATCH 22/42] Cosmetic changes in the README --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 60a30175..3ec04d41 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,8 @@ Moka provides cache implementations that support full concurrency of retrievals a high expected concurrency for updates. Moka also provides a not thread-safe cache implementation for single thread applications. -All caches perform a best-effort bounding of a hash map using an entry -replacement algorithm to determine which entries to evict when the capacity is -exceeded. +All caches perform a best-effort bounding of a hash map using an entry replacement +algorithm to determine which entries to evict when the capacity is exceeded. [gh-actions-badge]: https://github.com/moka-rs/moka/workflows/CI/badge.svg [release-badge]: https://img.shields.io/crates/v/moka.svg @@ -75,7 +74,7 @@ moka = { version = "0.6", features = ["future"] } ## Example: Synchronous Cache -The thread-safe, blocking caches are defined in the `sync` module. +The thread-safe, synchronous caches are defined in the `sync` module. Cache entries are manually added using `insert` method, and are stored in the cache until either evicted or manually invalidated. From 2c7ca64b6178e10aa72cab931dfe7dccd12e78d5 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 18 Dec 2021 15:58:27 +0800 Subject: [PATCH 23/42] Size-aware cache management - Implement size-aware cache management to the unsync cache. - Rename `KVEntry` to `KvEntry`. --- src/common.rs | 2 - src/future/builder.rs | 3 +- src/future/cache.rs | 3 +- src/sync.rs | 8 +- src/sync/base_cache.rs | 30 ++-- src/sync/builder.rs | 4 +- src/sync/cache.rs | 4 +- src/sync/invalidator.rs | 10 +- src/unsync.rs | 2 + src/unsync/builder.rs | 63 ++++++-- src/unsync/cache.rs | 316 +++++++++++++++++++++++++++++++++------- 11 files changed, 352 insertions(+), 93 deletions(-) diff --git a/src/common.rs b/src/common.rs index 56d5f81a..12c4793b 100644 --- a/src/common.rs +++ b/src/common.rs @@ -6,8 +6,6 @@ pub(crate) mod frequency_sketch; pub(crate) mod thread_pool; pub(crate) mod unsafe_weak_pointer; -pub(crate) type Weigher = Box u64 + Send + Sync + 'static>; - pub(crate) trait AccessTime { fn last_accessed(&self) -> Option; fn set_last_accessed(&mut self, timestamp: Instant); diff --git a/src/future/builder.rs b/src/future/builder.rs index b713106a..fc7f2c47 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -1,6 +1,5 @@ -use crate::common::Weigher; - use super::Cache; +use crate::sync::Weigher; use std::{ collections::hash_map::RandomState, diff --git a/src/future/cache.rs b/src/future/cache.rs index c130708b..9669a284 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -3,11 +3,10 @@ use super::{ CacheBuilder, ConcurrentCacheExt, }; use crate::{ - common::Weigher, sync::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, - PredicateId, WriteOp, + PredicateId, Weigher, WriteOp, }, PredicateError, }; diff --git a/src/sync.rs b/src/sync.rs index 373486fb..bacf16bc 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -41,6 +41,8 @@ pub trait ConcurrentCacheExt { fn sync(&self); } +pub(crate) type Weigher = Box u64 + Send + Sync + 'static>; + pub(crate) struct KeyHash { pub(crate) key: Arc, pub(crate) hash: u64, @@ -92,12 +94,12 @@ impl KeyHashDate { } } -pub(crate) struct KVEntry { +pub(crate) struct KvEntry { pub(crate) key: Arc, pub(crate) entry: Arc>, } -impl KVEntry { +impl KvEntry { pub(crate) fn new(key: Arc, entry: Arc>) -> Self { Self { key, entry } } @@ -290,5 +292,5 @@ pub(crate) enum ReadOp { pub(crate) enum WriteOp { Upsert(KeyHash, Arc>), - Remove(KVEntry), + Remove(KvEntry), } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 2aeca39e..621ef8cc 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -2,13 +2,13 @@ use super::{ deques::Deques, housekeeper::{Housekeeper, InnerSync, SyncPace}, invalidator::{GetOrRemoveEntry, InvalidationResult, Invalidator, KeyDateLite, PredicateFun}, - KVEntry, KeyDate, KeyHash, KeyHashDate, PredicateId, ReadOp, ValueEntry, WriteOp, + KeyDate, KeyHash, KeyHashDate, KvEntry, PredicateId, ReadOp, ValueEntry, Weigher, WriteOp, }; use crate::{ common::{ deque::{CacheRegion, DeqNode, Deque}, frequency_sketch::FrequencySketch, - AccessTime, Weigher, + AccessTime, }, PredicateError, }; @@ -65,7 +65,7 @@ impl Clone for BaseCache { inner: Arc::clone(&self.inner), read_op_ch: self.read_op_ch.clone(), write_op_ch: self.write_op_ch.clone(), - housekeeper: self.housekeeper.as_ref().map(|h| Arc::clone(h)), + housekeeper: self.housekeeper.as_ref().map(Arc::clone), } } } @@ -164,7 +164,7 @@ where } #[inline] - pub(crate) fn remove_entry(&self, key: &Q) -> Option> + pub(crate) fn remove_entry(&self, key: &Q) -> Option> where Arc: Borrow, Q: Hash + Eq + ?Sized, @@ -525,14 +525,14 @@ where } #[inline] - fn remove_entry(&self, key: &Q) -> Option> + fn remove_entry(&self, key: &Q) -> Option> where Arc: Borrow, Q: Hash + Eq + ?Sized, { self.cache .remove_entry(key) - .map(|(key, entry)| KVEntry::new(key, entry)) + .map(|(key, entry)| KvEntry::new(key, entry)) } fn max_capacity(&self) -> Option { @@ -630,6 +630,12 @@ where } } +// TODO: Divide this method into smaller methods so that unit tests can do more +// precise testing. +// - sync_reads +// - sync_writes +// - evict +// - invalidate_entries impl InnerSync for Inner where K: Hash + Eq + Send + Sync + 'static, @@ -737,7 +743,7 @@ where for _ in 0..count { match ch.try_recv() { Ok(Upsert(kh, entry)) => self.handle_upsert(kh, entry, ts, deqs, &freq, ws), - Ok(Remove(KVEntry { key, entry })) => Self::handle_remove(deqs, &key, entry, ws), + Ok(Remove(KvEntry { key, entry })) => Self::handle_remove(deqs, &key, entry, ws), Err(_) => break, }; } @@ -756,15 +762,17 @@ where entry.set_last_modified(timestamp); let raw_ts = RawTimestamps::new(&entry); + let policy_weight = ws.weigh(&kh.key, &entry.value); + if entry.is_admitted() { + // TODO: Update the total weight. + // The entry has been already admitted, so treat this as an update. deqs.move_to_back_ao(&entry); deqs.move_to_back_wo(&entry); return; } - let policy_weight = ws.weigh(&kh.key, &entry.value); - if self.has_enough_capacity(policy_weight, ws) { // There are enough room in the cache (or the cache is unbounded). // Add the candidate to the deques. @@ -790,6 +798,8 @@ where victim_nodes, skipped_nodes: mut skipped, } => { + // TODO: Try not to recalculate weights in handle_remove and handle_admit. + // Try to remove the victims from the cache (hash map). for victim in victim_nodes { if let Some((vic_key, vic_entry)) = self @@ -1130,7 +1140,7 @@ where is_done, }) = invalidator.task_result() { - for KVEntry { key, entry } in invalidated { + for KvEntry { key, entry } in invalidated { Self::handle_remove(deqs, &key, entry, ws); } if is_done { diff --git a/src/sync/builder.rs b/src/sync/builder.rs index dfae697a..f1341a51 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -1,6 +1,4 @@ -use crate::common::Weigher; - -use super::{Cache, SegmentedCache}; +use super::{Cache, SegmentedCache, Weigher}; use std::{ collections::hash_map::RandomState, diff --git a/src/sync/cache.rs b/src/sync/cache.rs index 1b2817ac..efdff901 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -2,9 +2,9 @@ use super::{ base_cache::{BaseCache, HouseKeeperArc, MAX_SYNC_REPEATS, WRITE_RETRY_INTERVAL_MICROS}, housekeeper::InnerSync, value_initializer::ValueInitializer, - CacheBuilder, ConcurrentCacheExt, PredicateId, WriteOp, + CacheBuilder, ConcurrentCacheExt, PredicateId, Weigher, WriteOp, }; -use crate::{common::Weigher, sync::value_initializer::InitResult, PredicateError}; +use crate::{sync::value_initializer::InitResult, PredicateError}; use crossbeam_channel::{Sender, TrySendError}; use std::{ diff --git a/src/sync/invalidator.rs b/src/sync/invalidator.rs index 01346940..774c0856 100644 --- a/src/sync/invalidator.rs +++ b/src/sync/invalidator.rs @@ -9,7 +9,7 @@ use crate::{ PredicateError, }; -use super::{base_cache::Inner, KVEntry, PredicateId, PredicateIdStr, ValueEntry}; +use super::{base_cache::Inner, KvEntry, PredicateId, PredicateIdStr, ValueEntry}; use parking_lot::{Mutex, RwLock}; use quanta::Instant; @@ -59,12 +59,12 @@ impl KeyDateLite { } pub(crate) struct InvalidationResult { - pub(crate) invalidated: Vec>, + pub(crate) invalidated: Vec>, pub(crate) is_done: bool, } impl InvalidationResult { - fn new(invalidated: Vec>, is_done: bool) -> Self { + fn new(invalidated: Vec>, is_done: bool) -> Self { Self { invalidated, is_done, @@ -399,7 +399,7 @@ where let ts = candidate.timestamp; if Self::apply(&predicates, cache, key, ts) { if let Some(entry) = Self::invalidate(cache, key, ts) { - invalidated.push(KVEntry { + invalidated.push(KvEntry { key: Arc::clone(key), entry, }) @@ -450,7 +450,7 @@ where } struct ScanResult { - invalidated: Vec>, + invalidated: Vec>, is_truncated: bool, newest_timestamp: Option, } diff --git a/src/unsync.rs b/src/unsync.rs index b3dd871c..f265607a 100644 --- a/src/unsync.rs +++ b/src/unsync.rs @@ -12,6 +12,8 @@ use quanta::Instant; use crate::common::{deque::DeqNode, AccessTime}; +pub(crate) type Weigher = Box u64>; + pub(crate) struct KeyDate { pub(crate) key: Rc, pub(crate) timestamp: Option, diff --git a/src/unsync/builder.rs b/src/unsync/builder.rs index ef38ef0d..ad98494f 100644 --- a/src/unsync/builder.rs +++ b/src/unsync/builder.rs @@ -1,4 +1,4 @@ -use super::Cache; +use super::{Cache, Weigher}; use std::{ collections::hash_map::RandomState, @@ -14,11 +14,12 @@ use std::{ /// # Examples /// /// ```rust -/// use moka::unsync::CacheBuilder; -/// +/// use moka::unsync::Cache; /// use std::time::Duration; /// -/// let mut cache = CacheBuilder::new(10_000) // Max 10,000 elements +/// let mut cache = Cache::builder() +/// // Max 10,000 elements +/// .max_capacity(10_000) /// // Time to live (TTL): 30 minutes /// .time_to_live(Duration::from_secs(30 * 60)) /// // Time to idle (TTI): 5 minutes @@ -36,27 +37,41 @@ use std::{ /// // after 30 minutes (TTL) from the insert(). /// ``` /// -pub struct CacheBuilder { - max_capacity: usize, +pub struct CacheBuilder { + max_capacity: Option, initial_capacity: Option, + weigher: Option>, time_to_live: Option, time_to_idle: Option, cache_type: PhantomData, } -impl CacheBuilder> +impl Default for CacheBuilder> where K: Eq + Hash, { - /// Construct a new `CacheBuilder` that will be used to build a `Cache` holding - /// up to `max_capacity` entries. - pub fn new(max_capacity: usize) -> Self { + fn default() -> Self { Self { - max_capacity, + max_capacity: None, initial_capacity: None, + weigher: None, time_to_live: None, time_to_idle: None, - cache_type: PhantomData::default(), + cache_type: Default::default(), + } + } +} + +impl CacheBuilder> +where + K: Eq + Hash, +{ + /// Construct a new `CacheBuilder` that will be used to build a `Cache` holding + /// up to `max_capacity` entries. + pub fn new(max_capacity: usize) -> Self { + Self { + max_capacity: Some(max_capacity), + ..Default::default() } } @@ -67,6 +82,7 @@ where self.max_capacity, self.initial_capacity, build_hasher, + self.weigher, self.time_to_live, self.time_to_idle, ) @@ -81,13 +97,22 @@ where self.max_capacity, self.initial_capacity, hasher, + self.weigher, self.time_to_live, self.time_to_idle, ) } } -impl CacheBuilder { +impl CacheBuilder { + /// Sets the max capacity of the cache. + pub fn max_capacity(self, max_capacity: usize) -> Self { + Self { + max_capacity: Some(max_capacity), + ..self + } + } + /// Sets the initial capacity of the cache. pub fn initial_capacity(self, capacity: usize) -> Self { Self { @@ -96,6 +121,14 @@ impl CacheBuilder { } } + /// Sets the weigher closure of the cache. + pub fn weigher(self, weigher: impl FnMut(&K, &V) -> u64 + 'static) -> Self { + Self { + weigher: Some(Box::new(weigher)), + ..self + } + } + /// Sets the time to live of the cache. /// /// A cached entry will be expired after the specified duration past from @@ -130,7 +163,7 @@ mod tests { // Cache let mut cache = CacheBuilder::new(100).build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), None); assert_eq!(cache.time_to_idle(), None); @@ -142,7 +175,7 @@ mod tests { .time_to_idle(Duration::from_secs(15 * 60)) .build(); - assert_eq!(cache.max_capacity(), 100); + assert_eq!(cache.max_capacity(), Some(100)); assert_eq!(cache.time_to_live(), Some(Duration::from_secs(45 * 60))); assert_eq!(cache.time_to_idle(), Some(Duration::from_secs(15 * 60))); diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index 7654f6ad..b4627427 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -1,4 +1,4 @@ -use super::{deques::Deques, KeyDate, KeyHashDate, ValueEntry}; +use super::{deques::Deques, CacheBuilder, KeyDate, KeyHashDate, ValueEntry, Weigher}; use crate::common::{ deque::{CacheRegion, DeqNode, Deque}, frequency_sketch::FrequencySketch, @@ -6,6 +6,7 @@ use crate::common::{ }; use quanta::{Clock, Instant}; +use smallvec::SmallVec; use std::{ borrow::Borrow, collections::{hash_map::RandomState, HashMap}, @@ -108,9 +109,11 @@ type CacheStore = std::collections::HashMap, ValueEntry, S> /// [ahash-crate]: https://crates.io/crates/ahash /// pub struct Cache { - max_capacity: usize, + max_capacity: Option, + weighted_size: u64, cache: CacheStore, build_hasher: S, + weigher: Option>, deques: Deques, frequency_sketch: FrequencySketch, time_to_live: Option, @@ -130,7 +133,11 @@ where /// [builder-struct]: ./struct.CacheBuilder.html pub fn new(max_capacity: usize) -> Self { let build_hasher = RandomState::default(); - Self::with_everything(max_capacity, None, build_hasher, None, None) + Self::with_everything(Some(max_capacity), None, build_hasher, None, None, None) + } + + pub fn builder() -> CacheBuilder> { + CacheBuilder::default() } } @@ -143,9 +150,10 @@ where S: BuildHasher + Clone, { pub(crate) fn with_everything( - max_capacity: usize, + max_capacity: Option, initial_capacity: Option, build_hasher: S, + weigher: Option>, time_to_live: Option, time_to_idle: Option, ) -> Self { @@ -153,12 +161,14 @@ where initial_capacity.unwrap_or_default(), build_hasher.clone(), ); - let skt_capacity = usize::max(max_capacity * 32, 100); + let skt_capacity = max_capacity.map(|n| n * 32).unwrap_or_default().max(100); let frequency_sketch = FrequencySketch::with_capacity(skt_capacity); Self { - max_capacity, + max_capacity: max_capacity.map(|n| n as u64), + weighted_size: 0, cache, build_hasher, + weigher, deques: Deques::default(), frequency_sketch, time_to_live, @@ -208,14 +218,16 @@ where /// If the cache has this key present, the value is updated. pub fn insert(&mut self, key: K, value: V) { let timestamp = self.evict_if_needed(); + let policy_weight = weigh(&mut self.weigher, &key, &value); let key = Rc::new(key); let entry = ValueEntry::new(value); if let Some(old_entry) = self.cache.insert(Rc::clone(&key), entry) { - self.handle_update(key, timestamp, old_entry); + let old_policy_weight = weigh(&mut self.weigher, &key, &old_entry.value); + self.handle_update(key, timestamp, policy_weight, old_entry, old_policy_weight); } else { let hash = self.hash(&key); - self.handle_insert(key, hash, timestamp); + self.handle_insert(key, hash, policy_weight, timestamp); } } @@ -230,6 +242,7 @@ where { self.evict_if_needed(); + // TODO: Update the weighted_size. if let Some(mut entry) = self.cache.remove(key) { self.deques.unlink_ao(&mut entry); Deques::unlink_wo(&mut self.deques.write_order, &mut entry) @@ -244,6 +257,7 @@ where pub fn invalidate_all(&mut self) { self.cache.clear(); self.deques.clear(); + self.weighted_size = 0; } /// Discards cached values that satisfy a predicate. @@ -274,6 +288,7 @@ where .map(|(key, _)| Rc::clone(key)) .collect::>(); + // TODO: Update the weighted_size. keys_to_invalidate.into_iter().for_each(|k| { if let Some(mut entry) = cache.remove(&k) { deques.unlink_ao(&mut entry); @@ -283,8 +298,8 @@ where } /// Returns the `max_capacity` of this cache. - pub fn max_capacity(&self) -> usize { - self.max_capacity + pub fn max_capacity(&self) -> Option { + self.max_capacity.map(|n| n as usize) } /// Returns the `time_to_live` of this cache. @@ -373,85 +388,192 @@ where deques.move_to_back_ao(entry) } + fn has_enough_capacity(&self, candidate_weight: u64, ws: u64) -> bool { + self.max_capacity + .map(|limit| ws + candidate_weight <= limit) + .unwrap_or(true) + } + + fn saturating_add_to_total_weight(&mut self, weight: u64) { + let total = &mut self.weighted_size; + *total = total.saturating_add(weight); + } + + fn saturating_sub_from_total_weight(&mut self, weight: u64) { + let total = &mut self.weighted_size; + *total = total.saturating_sub(weight); + } + #[inline] - fn handle_insert(&mut self, key: Rc, hash: u64, timestamp: Option) { - let has_free_space = self.cache.len() <= self.max_capacity; + fn handle_insert( + &mut self, + key: Rc, + hash: u64, + policy_weight: u64, + timestamp: Option, + ) { + let has_free_space = self.has_enough_capacity(policy_weight, self.weighted_size); let (cache, deqs, freq) = (&mut self.cache, &mut self.deques, &self.frequency_sketch); if has_free_space { // Add the candidate to the deque. let key = Rc::clone(&key); - let mut entry = cache.get_mut(&key).unwrap(); + let entry = cache.get_mut(&key).unwrap(); deqs.push_back_ao( CacheRegion::MainProbation, KeyHashDate::new(Rc::clone(&key), hash, timestamp), - &mut entry, + entry, ); if self.time_to_live.is_some() { - deqs.push_back_wo(KeyDate::new(key, timestamp), &mut entry); + deqs.push_back_wo(KeyDate::new(key, timestamp), entry); } - } else { - let victim = Self::find_cache_victim(deqs, freq); - if Self::admit(hash, victim, freq) { - // Remove the victim from the cache and deque. - // - // TODO: Check if the selected victim was actually removed. If not, - // maybe we should find another victim. This can happen because it - // could have been already removed from the cache but the removal - // from the deque is still on the write operations queue and is not - // yet executed. - if let Some(mut vic_entry) = cache.remove(&victim.element.key) { + self.saturating_add_to_total_weight(policy_weight); + return; + } + + if let Some(max) = self.max_capacity { + if policy_weight > max { + // The candidate is too big to fit in the cache. Reject it. + cache.remove(&Rc::clone(&key)); + return; + } + } + + let mut candidate = EntrySizeAndFrequency::new(policy_weight); + candidate.add_frequency(freq, hash); + + match Self::admit(&candidate, cache, deqs, freq, &mut self.weigher) { + AdmissionResult::Admitted { + victim_nodes, + victims_weight, + } => { + // Remove the victims from the cache (hash map) and deque. + for victim in victim_nodes { + // Remove the victim from the hash map. + let mut vic_entry = cache + .remove(unsafe { &victim.as_ref().element.key }) + .expect("Cannot remove a victim from the hash map"); + // And then remove the victim from the deques. deqs.unlink_ao(&mut vic_entry); Deques::unlink_wo(&mut deqs.write_order, &mut vic_entry); - } else { - let victim = NonNull::from(victim); - deqs.unlink_node_ao(victim); } - // Add the candidate to the deque. - let mut entry = cache.get_mut(&key).unwrap(); + // Add the candidate to the deque. + let entry = cache.get_mut(&key).unwrap(); let key = Rc::clone(&key); deqs.push_back_ao( CacheRegion::MainProbation, KeyHashDate::new(Rc::clone(&key), hash, timestamp), - &mut entry, + entry, ); if self.time_to_live.is_some() { - deqs.push_back_wo(KeyDate::new(key, timestamp), &mut entry); + deqs.push_back_wo(KeyDate::new(key, timestamp), entry); } - } else { + + Self::saturating_sub_from_total_weight(self, victims_weight); + Self::saturating_add_to_total_weight(self, policy_weight); + } + AdmissionResult::Rejected => { // Remove the candidate from the cache. cache.remove(&key); } } } - #[inline] - fn find_cache_victim<'a>( - deqs: &'a mut Deques, - _freq: &FrequencySketch, - ) -> &'a DeqNode> { - // TODO: Check its frequency. If it is not very low, maybe we should - // check frequencies of next few others and pick from them. - deqs.probation.peek_front().expect("No victim found") - } - + // #[inline] + // fn find_cache_victim<'a>( + // deqs: &'a mut Deques, + // _freq: &FrequencySketch, + // ) -> &'a DeqNode> { + // // TODO: Check its frequency. If it is not very low, maybe we should + // // check frequencies of next few others and pick from them. + // deqs.probation.peek_front().expect("No victim found") + // } + + // #[inline] + // fn admit( + // candidate_hash: u64, + // victim: &DeqNode>, + // freq: &FrequencySketch, + // ) -> bool { + // // TODO: Implement some randomness to mitigate hash DoS attack. + // // See Caffeine's implementation. + // freq.frequency(candidate_hash) > freq.frequency(victim.element.hash) + // } + + /// Performs size-aware admission explained in the paper: + /// [Lightweight Robust Size Aware Cache Management][size-aware-cache-paper] + /// by Gil Einziger, Ohad Eytan, Roy Friedman, Ben Manes. + /// + /// [size-aware-cache-paper]: https://arxiv.org/abs/2105.08770 + /// + /// There are some modifications in this implementation: + /// - To admit to the main space, candidate's frequency must be higher than + /// the aggregated frequencies of the potential victims. (In the paper, + /// `>=` operator is used rather than `>`) The `>` operator will do a better + /// job to prevent the main space from polluting. + /// - When a candidate is rejected, the potential victims will stay at the LRU + /// position of the probation access-order queue. (In the paper, they will be + /// promoted (to the MRU position?) to force the eviction policy to select a + /// different set of victims for the next candidate). We may implement the + /// paper's behavior later? + /// #[inline] fn admit( - candidate_hash: u64, - victim: &DeqNode>, + candidate: &EntrySizeAndFrequency, + cache: &CacheStore, + deqs: &Deques, freq: &FrequencySketch, - ) -> bool { + weigher: &mut Option>, + ) -> AdmissionResult { + let mut victims = EntrySizeAndFrequency::default(); + let mut victim_nodes = SmallVec::default(); + + // Get first potential victim at the LRU position. + let mut next_victim = deqs.probation.peek_front(); + + // Aggregate potential victims. + while victims.weight < candidate.weight { + if candidate.freq < victims.freq { + break; + } + if let Some(victim) = next_victim.take() { + next_victim = victim.next_node(); + + let vic_entry = cache + .get(&victim.element.key) + .expect("Cannot get an victim entry"); + victims.add_policy_weight(victim.element.key.as_ref(), &vic_entry.value, weigher); + victims.add_frequency(freq, victim.element.hash); + victim_nodes.push(NonNull::from(victim)); + } else { + // No more potential victims. + break; + } + } + + // Admit or reject the candidate. + // TODO: Implement some randomness to mitigate hash DoS attack. // See Caffeine's implementation. - freq.frequency(candidate_hash) > freq.frequency(victim.element.hash) + + if victims.weight >= candidate.weight && candidate.freq > victims.freq { + AdmissionResult::Admitted { + victim_nodes, + victims_weight: victims.weight, + } + } else { + AdmissionResult::Rejected + } } fn handle_update( &mut self, key: Rc, timestamp: Option, + policy_weight: u64, old_entry: ValueEntry, + old_policy_weight: u64, ) { let entry = self.cache.get_mut(&key).unwrap(); entry.replace_deq_nodes_with(old_entry); @@ -461,7 +583,10 @@ where } let deqs = &mut self.deques; deqs.move_to_back_ao(entry); - deqs.move_to_back_wo(entry) + deqs.move_to_back_wo(entry); + + self.saturating_sub_from_total_weight(old_policy_weight); + self.saturating_add_to_total_weight(policy_weight); } fn evict(&mut self, now: Instant) { @@ -500,6 +625,7 @@ where } } + // TODO: Update the weighted_size. #[inline] fn remove_expired_ao( deq_name: &str, @@ -535,6 +661,7 @@ where } } + // TODO: Update the weighted_size. #[inline] fn remove_expired_wo(&mut self, batch_size: usize, now: Instant) { let time_to_live = &self.time_to_live; @@ -580,6 +707,48 @@ where } } +#[derive(Default)] +struct EntrySizeAndFrequency { + weight: u64, + freq: u32, +} + +impl EntrySizeAndFrequency { + fn new(policy_weight: u64) -> Self { + Self { + weight: policy_weight, + ..Default::default() + } + } + + fn add_policy_weight(&mut self, key: &K, value: &V, weigher: &mut Option>) { + self.weight += weigh(weigher, key, value); + } + + fn add_frequency(&mut self, freq: &FrequencySketch, hash: u64) { + self.freq += freq.frequency(hash) as u32; + } +} + +// Access-Order Queue Node +type AoqNode = NonNull>>; + +enum AdmissionResult { + Admitted { + victim_nodes: SmallVec<[AoqNode; 8]>, + victims_weight: u64, + }, + Rejected, +} + +// +// private free-standing functions +// +#[inline] +fn weigh(weigher: &mut Option>, key: &K, value: &V) -> u64 { + weigher.as_mut().map(|w| w(key, value)).unwrap_or(1) +} + // To see the debug prints, run test as `cargo test -- --nocapture` #[cfg(test)] mod tests { @@ -626,6 +795,55 @@ mod tests { assert_eq!(cache.get(&"b"), None); } + #[test] + fn size_aware_eviction() { + let weigher = |_k: &&str, v: &(&str, u64)| v.1; + + let alice = ("alice", 10u64); + let bob = ("bob", 15); + let cindy = ("cindy", 5); + let david = ("david", 15); + let dennis = ("dennis", 15); + + let mut cache = Cache::builder().max_capacity(31).weigher(weigher).build(); + + cache.insert("a", alice); + cache.insert("b", bob); + assert_eq!(cache.get(&"a"), Some(&alice)); + assert_eq!(cache.get(&"b"), Some(&bob)); + // order (LRU -> MRU) and counts: a -> 1, b -> 1 + + cache.insert("c", cindy); + assert_eq!(cache.get(&"c"), Some(&cindy)); + // order and counts: a -> 1, b -> 1, c -> 1 + + assert_eq!(cache.get(&"a"), Some(&alice)); + assert_eq!(cache.get(&"b"), Some(&bob)); + // order and counts: c -> 1, a -> 2, b -> 2 + + // To enter "d" (weight: 15), it needs to evict "c" (w: 5) and "a" (w: 10). + // "d" must have higher count than 3, which is the aggregated count + // of "a" and "c". + cache.insert("d", david); // count: d -> 0 + assert_eq!(cache.get(&"d"), None); // d -> 1 + + cache.insert("d", david); + assert_eq!(cache.get(&"d"), None); // d -> 2 + + cache.insert("d", david); + assert_eq!(cache.get(&"d"), None); // d -> 3 + + cache.insert("d", david); + assert_eq!(cache.get(&"d"), None); // d -> 4 + + // Finally "d" should be admitted by evicting "c" and "a". + cache.insert("d", dennis); + assert_eq!(cache.get(&"a"), None); + assert_eq!(cache.get(&"b"), Some(&bob)); + assert_eq!(cache.get(&"c"), None); + assert_eq!(cache.get(&"d"), Some(&dennis)); + } + #[test] fn invalidate_all() { let mut cache = Cache::new(100); From 4c2ac944a17d291d6a71433aa08b28c7c1a613ab Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 12 Dec 2021 09:31:16 +0800 Subject: [PATCH 24/42] Temporary disable Clippy on Rust 1.58 beta on CI Disable Clippy until we figure out what to do on the following lint: https://rust-lang.github.io/rust-clippy/master/index.html#non_send_fields_in_send_ty Relates to #54 (cherry picked from commit 646beb04b00dcc5349b8a30f1feac89a4756ade0) --- src/future/cache.rs | 2 ++ src/sync/cache.rs | 2 ++ src/sync/deques.rs | 2 ++ src/sync/segment.rs | 2 ++ 4 files changed, 8 insertions(+) diff --git a/src/future/cache.rs b/src/future/cache.rs index 9669a284..a8fc5f49 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -279,6 +279,8 @@ pub struct Cache { value_initializer: Arc>, } +// TODO: https://github.com/moka-rs/moka/issues/54 +#[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for Cache where K: Send + Sync, diff --git a/src/sync/cache.rs b/src/sync/cache.rs index efdff901..bccb9dcb 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -231,6 +231,8 @@ pub struct Cache { value_initializer: Arc>, } +// TODO: https://github.com/moka-rs/moka/issues/54 +#[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for Cache where K: Send + Sync, diff --git a/src/sync/deques.rs b/src/sync/deques.rs index 50cd5acd..99e0bd13 100644 --- a/src/sync/deques.rs +++ b/src/sync/deques.rs @@ -11,6 +11,8 @@ pub(crate) struct Deques { } #[cfg(feature = "future")] +// TODO: https://github.com/moka-rs/moka/issues/54 +#[allow(clippy::non_send_fields_in_send_ty)] // Multi-threaded async runtimes require base_cache::Inner to be Send, but it will // not be without this `unsafe impl`. This is because DeqNodes have NonNull // pointers. diff --git a/src/sync/segment.rs b/src/sync/segment.rs index 1e50ba31..206a6c3a 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -24,6 +24,8 @@ pub struct SegmentedCache { inner: Arc>, } +// TODO: https://github.com/moka-rs/moka/issues/54 +#[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for SegmentedCache where K: Send + Sync, From 4404e6fd1cca5d41aa59688b7b67c842425c7be3 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 18 Dec 2021 19:06:13 +0800 Subject: [PATCH 25/42] Size-aware cache management Fix after merging the master branch. Note that added crossbeam-utils crate to the dependency for `AtomicCell` (using for tracking weighted size). --- .vscode/settings.json | 17 +++-------------- Cargo.toml | 1 + src/future/builder.rs | 5 ++--- src/sync/base_cache.rs | 22 +++++++++++----------- src/sync/builder.rs | 5 ++--- src/unsync/builder.rs | 5 ++--- src/unsync/cache.rs | 4 ++-- 7 files changed, 23 insertions(+), 36 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f4809d6b..e05e0ee2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,20 +4,6 @@ "CARGO_TARGET_DIR": "target/ra" }, "cSpell.words": [ - "CLFU", - "Deque", - "Deques", - "Einziger", - "Eytan", - "Hasher", - "Kawano", - "MSRV", - "Moka", - "Ohad", - "RUSTFLAGS", - "Ristretto", - "Tatsuya", - "Upsert", "aarch", "actix", "ahash", @@ -29,7 +15,9 @@ "deqs", "Deque", "Deques", + "Einziger", "else's", + "Eytan", "getrandom", "Hasher", "Kawano", @@ -38,6 +26,7 @@ "MSRV", "nanos", "nocapture", + "Ohad", "peekable", "preds", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index d680d37a..006f25ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ atomic64 = [] [dependencies] crossbeam-channel = "0.5" +crossbeam-utils = "0.8" moka-cht = "0.4.2" num_cpus = "1.13" once_cell = "1.7" diff --git a/src/future/builder.rs b/src/future/builder.rs index 23f60dcd..28b04b44 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -203,7 +203,6 @@ impl CacheBuilder { mod tests { use super::CacheBuilder; - use super::Cache; use std::time::Duration; #[tokio::test] @@ -237,7 +236,7 @@ mod tests { #[should_panic(expected = "time_to_live is longer than 1000 years")] async fn build_cache_too_long_ttl() { let thousand_years_secs: u64 = 1000 * 365 * 24 * 3600; - let builder: CacheBuilder> = CacheBuilder::new(100); + let builder: CacheBuilder = CacheBuilder::new(100); let duration = Duration::from_secs(thousand_years_secs); builder .time_to_live(duration + Duration::from_secs(1)) @@ -248,7 +247,7 @@ mod tests { #[should_panic(expected = "time_to_idle is longer than 1000 years")] async fn build_cache_too_long_tti() { let thousand_years_secs: u64 = 1000 * 365 * 24 * 3600; - let builder: CacheBuilder> = CacheBuilder::new(100); + let builder: CacheBuilder = CacheBuilder::new(100); let duration = Duration::from_secs(thousand_years_secs); builder .time_to_idle(duration + Duration::from_secs(1)) diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 6d004995..b8ab1089 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -14,8 +14,8 @@ use crate::{ }, PredicateError, }; - use crossbeam_channel::{Receiver, Sender, TrySendError}; +use crossbeam_utils::atomic::AtomicCell; use parking_lot::{Mutex, RwLock}; use smallvec::SmallVec; use std::{ @@ -400,8 +400,8 @@ impl EntrySizeAndFrequency { } struct RawTimestamps { - last_accessed: Arc, - last_modified: Arc, + last_accessed: Arc, + last_modified: Arc, } impl RawTimestamps { @@ -432,7 +432,7 @@ type CacheEntry = (Arc, Arc>); pub(crate) struct Inner { max_capacity: Option, - weighted_size: AtomicU64, + weighted_size: AtomicCell, cache: CacheStore, build_hasher: S, deques: Mutex>, @@ -481,14 +481,14 @@ where // Ensure skt_capacity fits in a range of `128u32..=u32::MAX`. let skt_capacity = max_capacity - .map(|n| n.try_into().unwrap_or_default()) // Convert to u32. - .unwrap_or(u32::MAX) + .map(|n| n.try_into().unwrap_or(u32::MAX)) // Convert to u32. + .unwrap_or_default() .max(128); let frequency_sketch = FrequencySketch::with_capacity(skt_capacity); Self { max_capacity: max_capacity.map(|n| n as u64), - weighted_size: AtomicU64::default(), + weighted_size: AtomicCell::default(), cache, build_hasher, deques: Mutex::new(Deques::default()), @@ -657,7 +657,7 @@ where let mut calls = 0; let mut should_sync = true; - let current_ws = self.weighted_size.load(Ordering::Acquire); + let current_ws = self.weighted_size.load(); let mut ws = WeightedSize { size: current_ws, weigher: self.weigher.as_ref(), @@ -695,8 +695,8 @@ where } } - debug_assert_eq!(self.weighted_size.load(Ordering::Acquire), current_ws); - self.weighted_size.store(ws.size, Ordering::Release); + debug_assert_eq!(self.weighted_size.load(), current_ws); + self.weighted_size.store(ws.size); if should_sync { Some(SyncPace::Fast) @@ -1291,7 +1291,7 @@ mod tests { let ensure_sketch_len = |max_capacity, len, name| { let cache = BaseCache::::new( - max_capacity, + Some(max_capacity), None, RandomState::default(), None, diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 00edbc8a..066bb34a 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -265,7 +265,6 @@ impl CacheBuilder { #[cfg(test)] mod tests { - use super::Cache; use super::CacheBuilder; use std::time::Duration; @@ -329,7 +328,7 @@ mod tests { #[should_panic(expected = "time_to_live is longer than 1000 years")] async fn build_cache_too_long_ttl() { let thousand_years_secs: u64 = 1000 * 365 * 24 * 3600; - let builder: CacheBuilder> = CacheBuilder::new(100); + let builder: CacheBuilder = CacheBuilder::new(100); let duration = Duration::from_secs(thousand_years_secs); builder .time_to_live(duration + Duration::from_secs(1)) @@ -340,7 +339,7 @@ mod tests { #[should_panic(expected = "time_to_idle is longer than 1000 years")] async fn build_cache_too_long_tti() { let thousand_years_secs: u64 = 1000 * 365 * 24 * 3600; - let builder: CacheBuilder> = CacheBuilder::new(100); + let builder: CacheBuilder = CacheBuilder::new(100); let duration = Duration::from_secs(thousand_years_secs); builder .time_to_idle(duration + Duration::from_secs(1)) diff --git a/src/unsync/builder.rs b/src/unsync/builder.rs index 034d3c38..ab05d9a0 100644 --- a/src/unsync/builder.rs +++ b/src/unsync/builder.rs @@ -170,7 +170,6 @@ impl CacheBuilder { #[cfg(test)] mod tests { - use super::Cache; use super::CacheBuilder; use std::time::Duration; @@ -204,7 +203,7 @@ mod tests { #[should_panic(expected = "time_to_live is longer than 1000 years")] async fn build_cache_too_long_ttl() { let thousand_years_secs: u64 = 1000 * 365 * 24 * 3600; - let builder: CacheBuilder> = CacheBuilder::new(100); + let builder: CacheBuilder = CacheBuilder::new(100); let duration = Duration::from_secs(thousand_years_secs); builder .time_to_live(duration + Duration::from_secs(1)) @@ -215,7 +214,7 @@ mod tests { #[should_panic(expected = "time_to_idle is longer than 1000 years")] async fn build_cache_too_long_tti() { let thousand_years_secs: u64 = 1000 * 365 * 24 * 3600; - let builder: CacheBuilder> = CacheBuilder::new(100); + let builder: CacheBuilder = CacheBuilder::new(100); let duration = Duration::from_secs(thousand_years_secs); builder .time_to_idle(duration + Duration::from_secs(1)) diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index cae88ff2..2a2d48a4 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -165,8 +165,8 @@ where // Ensure skt_capacity fits in a range of `128u32..=u32::MAX`. let skt_capacity = max_capacity - .map(|n| n.try_into().unwrap_or_default()) // Convert to u32. - .unwrap_or(u32::MAX) + .map(|n| n.try_into().unwrap_or(u32::MAX)) // Convert to u32. + .unwrap_or_default() .max(128); let frequency_sketch = FrequencySketch::with_capacity(skt_capacity); Self { From af92346ee2b82dd85d3f811f62198ebdc3df933a Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 25 Dec 2021 19:09:18 +0800 Subject: [PATCH 26/42] Size-aware cache management - Change `weigher`'s return type from `u64` to `u32`. - Refactor data types `ValueEntry`, `KeyDate` and `KeyHashDate` to reduce the number of `Arc` pointers used by adding `EntryInfo` struct. --- README.md | 13 ++-- src/common.rs | 9 --- src/future/builder.rs | 2 +- src/future/cache.rs | 10 +-- src/sync.rs | 169 ++++++++++++++++++++++++++++------------ src/sync/base_cache.rs | 64 ++++++--------- src/sync/builder.rs | 2 +- src/sync/cache.rs | 10 +-- src/sync/invalidator.rs | 3 +- src/unsync.rs | 11 ++- src/unsync/builder.rs | 2 +- src/unsync/cache.rs | 35 ++++----- 12 files changed, 189 insertions(+), 141 deletions(-) diff --git a/README.md b/README.md index dddbc445..7265ff15 100644 --- a/README.md +++ b/README.md @@ -275,7 +275,7 @@ cache.get(&key); ## Example: Bounding a Cache with Weighted Size of Entry A `weigher` closure can be set at the cache creation time. It will calculate and -return a weighted size (relative size) of an entry. When it is set, a cache tiers to +return a weighted size (relative size) of an entry. When it is set, a cache tries to evict entries when the total weighted size exceeds its `max_capacity`. ```rust @@ -284,17 +284,18 @@ use moka::sync::Cache; fn main() { // Evict based on the byte length of strings in the cache. let cache = Cache::builder() - // Up to 32MiB instead of 3M entries because this cache is going to have - // a weigher. + // A weigher closure takes &K and &V and returns a u32 representing the + // relative size of the entry. Here, we use the byte length of the value + // String as the size. + .weigher(|_key, value: &String| -> u32 { value.len() as u32 }) + // This cache will hold up to 32MiB of values. .max_capacity(32 * 1024 * 1024) - // A weigher closure takes &K and &V and returns a u64 representing the - // relative size of the entry. - .weigher(|_key, value: &String| -> u64 { value.len() as u64 }) .build(); cache.insert(0, "zero".to_string()); } ``` + ## Example: Expiration Policies Moka supports the following expiration policies: diff --git a/src/common.rs b/src/common.rs index ea5d4add..4a67c013 100644 --- a/src/common.rs +++ b/src/common.rs @@ -14,12 +14,3 @@ pub(crate) mod unsafe_weak_pointer; pub(crate) mod atomic_time; pub(crate) mod time; - -use time::Instant; - -pub(crate) trait AccessTime { - fn last_accessed(&self) -> Option; - fn set_last_accessed(&mut self, timestamp: Instant); - fn last_modified(&self) -> Option; - fn set_last_modified(&mut self, timestamp: Instant); -} diff --git a/src/future/builder.rs b/src/future/builder.rs index 01aaf9e0..56ac5529 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -153,7 +153,7 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. - pub fn weigher(self, weigher: impl Fn(&K, &V) -> u64 + Send + Sync + 'static) -> Self { + pub fn weigher(self, weigher: impl Fn(&K, &V) -> u32 + Send + Sync + 'static) -> Self { Self { weigher: Some(Box::new(weigher)), ..self diff --git a/src/future/cache.rs b/src/future/cache.rs index 5e54eb99..5b368381 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -163,9 +163,9 @@ use std::{ /// // Up to 32MiB instead of 3M entries because this cache is going to have /// // a weigher. /// .max_capacity(32 * 1024 * 1024) -/// // A weigher closure takes &K and &V and returns a u64 representing the +/// // A weigher closure takes &K and &V and returns a u32 representing the /// // relative size of the entry. -/// .weigher(|_key, value: &String| -> u64 { value.len() as u64 }) +/// .weigher(|_key, value: &String| -> u32 { value.len() as u32 }) /// .build(); /// cache.insert(2, "two".to_string()).await; /// } @@ -177,7 +177,7 @@ use std::{ /// /// At the cache creation time, a weigher closure can be set by the `weigher` method /// of the `CacheBuilder`. A weigher closure takes `&K` and `&V` as the arguments and -/// returns a `u64` representing the relative size of the entry: +/// returns a `u32` representing the relative size of the entry: /// /// - If the `weigher` is _not_ set, the cache will treat each entry has the same /// size of `1`. This means the cache will be bounded by the number of entries. @@ -956,9 +956,9 @@ mod tests { #[tokio::test] async fn size_aware_eviction() { - let weigher = |_k: &&str, v: &(&str, u64)| v.1; + let weigher = |_k: &&str, v: &(&str, u32)| v.1; - let alice = ("alice", 10u64); + let alice = ("alice", 10); let bob = ("bob", 15); let cindy = ("cindy", 5); let david = ("david", 15); diff --git a/src/sync.rs b/src/sync.rs index ad4774c5..290b11c4 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -1,12 +1,12 @@ //! Provides thread-safe, blocking cache implementations. -use crate::common::{atomic_time::AtomicInstant, deque::DeqNode, time::Instant, AccessTime}; +use crate::common::{atomic_time::AtomicInstant, deque::DeqNode, time::Instant}; use parking_lot::Mutex; use std::{ ptr::NonNull, sync::{ - atomic::{AtomicBool, Ordering}, + atomic::{AtomicBool, AtomicU32, Ordering}, Arc, }, }; @@ -40,7 +40,70 @@ pub trait ConcurrentCacheExt { fn sync(&self); } -pub(crate) type Weigher = Box u64 + Send + Sync + 'static>; +pub(crate) type Weigher = Box u32 + Send + Sync + 'static>; + +pub(crate) trait AccessTime { + fn last_accessed(&self) -> Option; + fn set_last_accessed(&self, timestamp: Instant); + fn last_modified(&self) -> Option; + fn set_last_modified(&self, timestamp: Instant); +} + +pub(crate) trait EntryInfo: AccessTime { + fn is_admitted(&self) -> bool; + fn set_is_admitted(&self, value: bool); + fn reset_timestamps(&self); +} + +#[derive(Default)] +pub(crate) struct EntryInfoFull { + is_admitted: AtomicBool, + last_accessed: AtomicInstant, + last_modified: AtomicInstant, + _weighed_size: AtomicU32, +} + +impl EntryInfo for EntryInfoFull { + #[inline] + fn is_admitted(&self) -> bool { + self.is_admitted.load(Ordering::Acquire) + } + + #[inline] + fn set_is_admitted(&self, value: bool) { + self.is_admitted.store(value, Ordering::Release); + } + + #[inline] + fn reset_timestamps(&self) { + self.last_accessed.reset(); + self.last_modified.reset(); + } +} + +impl AccessTime for EntryInfoFull { + #[inline] + fn last_accessed(&self) -> Option { + self.last_accessed.instant() + } + + #[inline] + fn set_last_accessed(&self, timestamp: Instant) { + self.last_accessed.set_instant(timestamp); + } + + #[inline] + fn last_modified(&self) -> Option { + self.last_modified.instant() + } + + #[inline] + fn set_last_modified(&self, timestamp: Instant) { + self.last_modified.set_instant(timestamp); + } +} + +pub(crate) type ArcedEntryInfo = Arc; pub(crate) struct KeyHash { pub(crate) key: Arc, @@ -63,34 +126,50 @@ impl Clone for KeyHash { } pub(crate) struct KeyDate { - pub(crate) key: Arc, - pub(crate) timestamp: Arc, + key: Arc, + entry_info: ArcedEntryInfo, } impl KeyDate { - pub(crate) fn new(key: Arc, timestamp: Arc) -> Self { - Self { key, timestamp } + pub(crate) fn new(key: Arc, entry_info: ArcedEntryInfo) -> Self { + Self { key, entry_info } } - pub(crate) fn timestamp(&self) -> Option { - self.timestamp.instant() + pub(crate) fn key(&self) -> &Arc { + &self.key + } + + pub(crate) fn entry_info(&self) -> &ArcedEntryInfo { + &self.entry_info + } + + pub(crate) fn last_modified(&self) -> Option { + self.entry_info.last_modified() } } pub(crate) struct KeyHashDate { - pub(crate) key: Arc, - pub(crate) hash: u64, - pub(crate) timestamp: Arc, + key: Arc, + hash: u64, + entry_info: Arc, } impl KeyHashDate { - pub(crate) fn new(kh: KeyHash, timestamp: Arc) -> Self { + pub(crate) fn new(kh: KeyHash, entry_info: ArcedEntryInfo) -> Self { Self { key: kh.key, hash: kh.hash, - timestamp, + entry_info, } } + + pub(crate) fn key(&self) -> &Arc { + &self.key + } + + pub(crate) fn entry_info(&self) -> &ArcedEntryInfo { + &self.entry_info + } } pub(crate) struct KvEntry { @@ -120,9 +199,7 @@ unsafe impl Send for DeqNodes {} pub(crate) struct ValueEntry { pub(crate) value: V, - is_admitted: Arc, - last_accessed: Arc, - last_modified: Arc, + info: ArcedEntryInfo, nodes: Mutex>, } @@ -130,9 +207,7 @@ impl ValueEntry { pub(crate) fn new(value: V) -> Self { Self { value, - is_admitted: Arc::new(AtomicBool::new(false)), - last_accessed: Default::default(), - last_modified: Default::default(), + info: Arc::new(EntryInfoFull::default()), nodes: Mutex::new(DeqNodes { access_order_q_node: None, write_order_q_node: None, @@ -148,36 +223,28 @@ impl ValueEntry { write_order_q_node: other_nodes.write_order_q_node, } }; - let last_accessed = Arc::clone(&other.last_accessed); - let last_modified = Arc::clone(&other.last_modified); + let info = Arc::clone(&other.info); // To prevent this updated ValueEntry from being evicted by an expiration policy, // set the max value to the timestamps. They will be replaced with the real // timestamps when applying writes. - last_accessed.reset(); - last_modified.reset(); + info.reset_timestamps(); Self { value, - is_admitted: Arc::clone(&other.is_admitted), - last_accessed, - last_modified, + info, nodes: Mutex::new(nodes), } } - pub(crate) fn is_admitted(&self) -> bool { - self.is_admitted.load(Ordering::Acquire) + pub(crate) fn entry_info(&self) -> ArcedEntryInfo { + Arc::clone(&self.info) } - pub(crate) fn set_is_admitted(&self, value: bool) { - self.is_admitted.store(value, Ordering::Release); - } - - pub(crate) fn raw_last_accessed(&self) -> Arc { - Arc::clone(&self.last_accessed) + pub(crate) fn is_admitted(&self) -> bool { + self.info.is_admitted() } - pub(crate) fn raw_last_modified(&self) -> Arc { - Arc::clone(&self.last_modified) + pub(crate) fn set_is_admitted(&self, value: bool) { + self.info.set_is_admitted(value); } pub(crate) fn access_order_q_node(&self) -> Option> { @@ -214,22 +281,22 @@ impl ValueEntry { impl AccessTime for Arc> { #[inline] fn last_accessed(&self) -> Option { - self.last_accessed.instant() + self.info.last_accessed() } #[inline] - fn set_last_accessed(&mut self, timestamp: Instant) { - self.last_accessed.set_instant(timestamp); + fn set_last_accessed(&self, timestamp: Instant) { + self.info.set_last_accessed(timestamp); } #[inline] fn last_modified(&self) -> Option { - self.last_modified.instant() + self.info.last_modified() } #[inline] - fn set_last_modified(&mut self, timestamp: Instant) { - self.last_modified.set_instant(timestamp); + fn set_last_modified(&self, timestamp: Instant) { + self.info.set_last_modified(timestamp); } } @@ -240,30 +307,30 @@ impl AccessTime for DeqNode> { } #[inline] - fn set_last_accessed(&mut self, _timestamp: Instant) { + fn set_last_accessed(&self, _timestamp: Instant) { unreachable!(); } #[inline] fn last_modified(&self) -> Option { - self.element.timestamp.instant() + self.element.entry_info.last_modified() } #[inline] - fn set_last_modified(&mut self, timestamp: Instant) { - self.element.timestamp.set_instant(timestamp); + fn set_last_modified(&self, timestamp: Instant) { + self.element.entry_info.set_last_modified(timestamp); } } impl AccessTime for DeqNode> { #[inline] fn last_accessed(&self) -> Option { - self.element.timestamp.instant() + self.element.entry_info.last_accessed() } #[inline] - fn set_last_accessed(&mut self, timestamp: Instant) { - self.element.timestamp.set_instant(timestamp); + fn set_last_accessed(&self, timestamp: Instant) { + self.element.entry_info.set_last_accessed(timestamp); } #[inline] @@ -272,7 +339,7 @@ impl AccessTime for DeqNode> { } #[inline] - fn set_last_modified(&mut self, _timestamp: Instant) { + fn set_last_modified(&self, _timestamp: Instant) { unreachable!(); } } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index b8ab1089..f3a1c3b4 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -2,7 +2,8 @@ use super::{ deques::Deques, housekeeper::{Housekeeper, InnerSync, SyncPace}, invalidator::{GetOrRemoveEntry, InvalidationResult, Invalidator, KeyDateLite, PredicateFun}, - KeyDate, KeyHash, KeyHashDate, KvEntry, PredicateId, ReadOp, ValueEntry, Weigher, WriteOp, + AccessTime, KeyDate, KeyHash, KeyHashDate, KvEntry, PredicateId, ReadOp, ValueEntry, Weigher, + WriteOp, }; use crate::{ common::{ @@ -10,7 +11,6 @@ use crate::{ deque::{CacheRegion, DeqNode, Deque}, frequency_sketch::FrequencySketch, time::{CheckedTimeOps, Clock, Instant}, - AccessTime, }, PredicateError, }; @@ -358,21 +358,21 @@ struct WeightedSize<'a, K, V> { impl<'a, K, V> WeightedSize<'a, K, V> { #[inline] - fn weigh(&self, key: &K, value: &V) -> u64 { + fn weigh(&self, key: &K, value: &V) -> u32 { self.weigher.map(|w| w(key, value)).unwrap_or(1) } #[inline] - fn saturating_add(&mut self, weight: u64) { + fn saturating_add(&mut self, weight: u32) { let total = &mut self.size; - *total = total.saturating_add(weight); + *total = total.saturating_add(weight as u64); } #[inline] fn saturating_sub(&mut self, key: &K, value: &V) { let weight = self.weigh(key, value); let total = &mut self.size; - *total = total.saturating_sub(weight); + *total = total.saturating_sub(weight as u64); } } @@ -383,15 +383,15 @@ struct EntrySizeAndFrequency { } impl EntrySizeAndFrequency { - fn new(policy_weight: u64) -> Self { + fn new(policy_weight: u32) -> Self { Self { - weight: policy_weight, + weight: policy_weight as u64, ..Default::default() } } fn add_policy_weight(&mut self, ws: &WeightedSize<'_, K, V>, key: &K, value: &V) { - self.weight += ws.weigh(key, value); + self.weight += ws.weigh(key, value) as u64; } fn add_frequency(&mut self, freq: &FrequencySketch, hash: u64) { @@ -399,20 +399,6 @@ impl EntrySizeAndFrequency { } } -struct RawTimestamps { - last_accessed: Arc, - last_modified: Arc, -} - -impl RawTimestamps { - fn new(entry: &Arc>) -> Self { - Self { - last_accessed: entry.raw_last_accessed(), - last_modified: entry.raw_last_modified(), - } - } -} - // Access-Order Queue Node type AoqNode = NonNull>>; @@ -718,9 +704,9 @@ where V: Send + Sync + 'static, S: BuildHasher + Clone + Send + Sync + 'static, { - fn has_enough_capacity(&self, candidate_weight: u64, ws: &WeightedSize<'_, K, V>) -> bool { + fn has_enough_capacity(&self, candidate_weight: u32, ws: &WeightedSize<'_, K, V>) -> bool { self.max_capacity - .map(|limit| ws.size + candidate_weight <= limit) + .map(|limit| ws.size + candidate_weight as u64 <= limit) .unwrap_or(true) } @@ -730,7 +716,7 @@ where let ch = &self.read_op_ch; for _ in 0..count { match ch.try_recv() { - Ok(Hit(hash, mut entry, timestamp)) => { + Ok(Hit(hash, entry, timestamp)) => { freq.increment(hash); entry.set_last_accessed(timestamp); deqs.move_to_back_ao(&entry) @@ -759,7 +745,7 @@ where fn handle_upsert( &self, kh: KeyHash, - mut entry: Arc>, + entry: Arc>, timestamp: Instant, deqs: &mut Deques, freq: &FrequencySketch, @@ -767,7 +753,6 @@ where ) { entry.set_last_accessed(timestamp); entry.set_last_modified(timestamp); - let raw_ts = RawTimestamps::new(&entry); let policy_weight = ws.weigh(&kh.key, &entry.value); @@ -783,12 +768,12 @@ where if self.has_enough_capacity(policy_weight, ws) { // There are enough room in the cache (or the cache is unbounded). // Add the candidate to the deques. - self.handle_admit(kh, &entry, policy_weight, raw_ts, deqs, ws); + self.handle_admit(kh, &entry, policy_weight, deqs, ws); return; } if let Some(max) = self.max_capacity { - if policy_weight > max { + if policy_weight as u64 > max { // The candidate is too big to fit in the cache. Reject it. self.cache.remove(&Arc::clone(&kh.key)); return; @@ -825,7 +810,7 @@ where skipped_nodes = skipped; // Add the candidate to the deques. - self.handle_admit(kh, &entry, policy_weight, raw_ts, deqs, ws); + self.handle_admit(kh, &entry, policy_weight, deqs, ws); } AdmissionResult::Rejected { skipped_nodes: s } => { skipped_nodes = s; @@ -924,8 +909,7 @@ where &self, kh: KeyHash, entry: &Arc>, - policy_weight: u64, - raw_ts: RawTimestamps, + policy_weight: u32, deqs: &mut Deques, ws: &mut WeightedSize<'_, K, V>, ) { @@ -933,11 +917,11 @@ where ws.saturating_add(policy_weight); deqs.push_back_ao( CacheRegion::MainProbation, - KeyHashDate::new(kh, raw_ts.last_accessed), + KeyHashDate::new(kh, entry.entry_info()), entry, ); if self.is_write_order_queue_enabled() { - deqs.push_back_wo(KeyDate::new(key, raw_ts.last_modified), entry); + deqs.push_back_wo(KeyDate::new(key, entry.entry_info()), entry); } entry.set_is_admitted(true); } @@ -1017,8 +1001,8 @@ where .and_then(|node| { if is_expired_entry_ao(tti, va, &*node, now) { Some(( - Some(Arc::clone(&node.element.key)), - Some(&node.element.timestamp), + Some(Arc::clone(node.element.key())), + Some(Arc::clone(node.element.entry_info())), )) } else { None @@ -1082,8 +1066,8 @@ where .and_then(|node| { if is_expired_entry_wo(ttl, va, &*node, now) { Some(( - Some(Arc::clone(&node.element.key)), - Some(&node.element.timestamp), + Some(Arc::clone(node.element.key())), + Some(Arc::clone(node.element.entry_info())), )) } else { None @@ -1177,7 +1161,7 @@ where while len < batch_size { if let Some(kd) = iter.next() { - if let Some(ts) = kd.timestamp() { + if let Some(ts) = kd.last_modified() { candidates.push(KeyDateLite::new(&kd.key, ts)); len += 1; } diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 87dc7997..2f42fb1f 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -232,7 +232,7 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. - pub fn weigher(self, weigher: impl Fn(&K, &V) -> u64 + Send + Sync + 'static) -> Self { + pub fn weigher(self, weigher: impl Fn(&K, &V) -> u32 + Send + Sync + 'static) -> Self { Self { weigher: Some(Box::new(weigher)), ..self diff --git a/src/sync/cache.rs b/src/sync/cache.rs index 6356d83f..b2eaf705 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -126,9 +126,9 @@ use std::{ /// // Up to 32MiB instead of 3M entries because this cache is going to have /// // a weigher. /// .max_capacity(32 * 1024 * 1024) -/// // A weigher closure takes &K and &V and returns a u64 representing the +/// // A weigher closure takes &K and &V and returns a u32 representing the /// // relative size of the entry. -/// .weigher(|_key, value: &String| -> u64 { value.len() as u64 }) +/// .weigher(|_key, value: &String| -> u32 { value.len() as u32 }) /// .build(); /// cache.insert(2, "two".to_string()); /// ``` @@ -139,7 +139,7 @@ use std::{ /// /// At the cache creation time, a weigher closure can be set by the `weigher` method /// of the `CacheBuilder`. A weigher closure takes `&K` and `&V` as the arguments and -/// returns a `u64` representing the relative size of the entry: +/// returns a `u32` representing the relative size of the entry: /// /// - If the `weigher` is _not_ set, the cache will treat each entry has the same /// size of `1`. This means the cache will be bounded by the number of entries. @@ -796,9 +796,9 @@ mod tests { #[test] fn size_aware_eviction() { - let weigher = |_k: &&str, v: &(&str, u64)| v.1; + let weigher = |_k: &&str, v: &(&str, u32)| v.1; - let alice = ("alice", 10u64); + let alice = ("alice", 10); let bob = ("bob", 15); let cindy = ("cindy", 5); let david = ("david", 15); diff --git a/src/sync/invalidator.rs b/src/sync/invalidator.rs index 91e8bb42..861a1c3e 100644 --- a/src/sync/invalidator.rs +++ b/src/sync/invalidator.rs @@ -5,12 +5,11 @@ use crate::{ thread_pool::{PoolName, ThreadPool, ThreadPoolRegistry}, time::Instant, unsafe_weak_pointer::UnsafeWeakPointer, - AccessTime, }, PredicateError, }; -use super::{base_cache::Inner, KvEntry, PredicateId, PredicateIdStr, ValueEntry}; +use super::{base_cache::Inner, AccessTime, KvEntry, PredicateId, PredicateIdStr, ValueEntry}; use parking_lot::{Mutex, RwLock}; use std::{ diff --git a/src/unsync.rs b/src/unsync.rs index 9dce7dec..82440131 100644 --- a/src/unsync.rs +++ b/src/unsync.rs @@ -9,9 +9,16 @@ use std::{ptr::NonNull, rc::Rc}; pub use builder::CacheBuilder; pub use cache::Cache; -use crate::common::{deque::DeqNode, time::Instant, AccessTime}; +use crate::common::{deque::DeqNode, time::Instant}; -pub(crate) type Weigher = Box u64>; +pub(crate) type Weigher = Box u32>; + +pub(crate) trait AccessTime { + fn last_accessed(&self) -> Option; + fn set_last_accessed(&mut self, timestamp: Instant); + fn last_modified(&self) -> Option; + fn set_last_modified(&mut self, timestamp: Instant); +} pub(crate) struct KeyDate { pub(crate) key: Rc, diff --git a/src/unsync/builder.rs b/src/unsync/builder.rs index 8ce9008e..c5cff345 100644 --- a/src/unsync/builder.rs +++ b/src/unsync/builder.rs @@ -137,7 +137,7 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. - pub fn weigher(self, weigher: impl FnMut(&K, &V) -> u64 + 'static) -> Self { + pub fn weigher(self, weigher: impl FnMut(&K, &V) -> u32 + 'static) -> Self { Self { weigher: Some(Box::new(weigher)), ..self diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index 2a2d48a4..3e12caab 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -1,9 +1,8 @@ -use super::{deques::Deques, CacheBuilder, KeyDate, KeyHashDate, ValueEntry, Weigher}; +use super::{deques::Deques, AccessTime, CacheBuilder, KeyDate, KeyHashDate, ValueEntry, Weigher}; use crate::common::{ deque::{CacheRegion, DeqNode, Deque}, frequency_sketch::FrequencySketch, time::{CheckedTimeOps, Clock, Instant}, - AccessTime, }; use smallvec::SmallVec; @@ -229,7 +228,7 @@ where let entry = ValueEntry::new(value); if let Some(old_entry) = self.cache.insert(Rc::clone(&key), entry) { - let old_policy_weight = weigh(&mut self.weigher, &key, &old_entry.value); + let old_policy_weight = weigh(&mut self.weigher, &key, &old_entry.value) as u64; self.handle_update(key, timestamp, policy_weight, old_entry, old_policy_weight); } else { let hash = self.hash(&key); @@ -402,20 +401,20 @@ where deques.move_to_back_ao(entry) } - fn has_enough_capacity(&self, candidate_weight: u64, ws: u64) -> bool { + fn has_enough_capacity(&self, candidate_weight: u32, ws: u64) -> bool { self.max_capacity - .map(|limit| ws + candidate_weight <= limit) + .map(|limit| ws + candidate_weight as u64 <= limit) .unwrap_or(true) } fn saturating_add_to_total_weight(&mut self, weight: u64) { let total = &mut self.weighted_size; - *total = total.saturating_add(weight); + *total = total.saturating_add(weight as u64); } fn saturating_sub_from_total_weight(&mut self, weight: u64) { let total = &mut self.weighted_size; - *total = total.saturating_sub(weight); + *total = total.saturating_sub(weight as u64); } #[inline] @@ -423,7 +422,7 @@ where &mut self, key: Rc, hash: u64, - policy_weight: u64, + policy_weight: u32, timestamp: Option, ) { let has_free_space = self.has_enough_capacity(policy_weight, self.weighted_size); @@ -441,19 +440,19 @@ where if self.time_to_live.is_some() { deqs.push_back_wo(KeyDate::new(key, timestamp), entry); } - self.saturating_add_to_total_weight(policy_weight); + self.saturating_add_to_total_weight(policy_weight as u64); return; } if let Some(max) = self.max_capacity { - if policy_weight > max { + if policy_weight as u64 > max { // The candidate is too big to fit in the cache. Reject it. cache.remove(&Rc::clone(&key)); return; } } - let mut candidate = EntrySizeAndFrequency::new(policy_weight); + let mut candidate = EntrySizeAndFrequency::new(policy_weight as u64); candidate.add_frequency(freq, hash); match Self::admit(&candidate, cache, deqs, freq, &mut self.weigher) { @@ -485,7 +484,7 @@ where } Self::saturating_sub_from_total_weight(self, victims_weight); - Self::saturating_add_to_total_weight(self, policy_weight); + Self::saturating_add_to_total_weight(self, policy_weight as u64); } AdmissionResult::Rejected => { // Remove the candidate from the cache. @@ -585,7 +584,7 @@ where &mut self, key: Rc, timestamp: Option, - policy_weight: u64, + policy_weight: u32, old_entry: ValueEntry, old_policy_weight: u64, ) { @@ -600,7 +599,7 @@ where deqs.move_to_back_wo(entry); self.saturating_sub_from_total_weight(old_policy_weight); - self.saturating_add_to_total_weight(policy_weight); + self.saturating_add_to_total_weight(policy_weight as u64); } fn evict(&mut self, now: Instant) { @@ -736,7 +735,7 @@ impl EntrySizeAndFrequency { } fn add_policy_weight(&mut self, key: &K, value: &V, weigher: &mut Option>) { - self.weight += weigh(weigher, key, value); + self.weight += weigh(weigher, key, value) as u64; } fn add_frequency(&mut self, freq: &FrequencySketch, hash: u64) { @@ -759,7 +758,7 @@ enum AdmissionResult { // private free-standing functions // #[inline] -fn weigh(weigher: &mut Option>, key: &K, value: &V) -> u64 { +fn weigh(weigher: &mut Option>, key: &K, value: &V) -> u32 { weigher.as_mut().map(|w| w(key, value)).unwrap_or(1) } @@ -810,9 +809,9 @@ mod tests { #[test] fn size_aware_eviction() { - let weigher = |_k: &&str, v: &(&str, u64)| v.1; + let weigher = |_k: &&str, v: &(&str, u32)| v.1; - let alice = ("alice", 10u64); + let alice = ("alice", 10); let bob = ("bob", 15); let cindy = ("cindy", 5); let david = ("david", 15); From 261e1ae64a725c92f7cad141400ed4c65cdb6443 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 25 Dec 2021 20:22:32 +0800 Subject: [PATCH 27/42] Size-aware cache management Update the `weigher` examples to avoid `u32` overflows. --- README.md | 5 ++++- src/future/cache.rs | 5 ++++- src/sync/cache.rs | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7265ff15..2662a8dc 100644 --- a/README.md +++ b/README.md @@ -279,6 +279,7 @@ return a weighted size (relative size) of an entry. When it is set, a cache trie evict entries when the total weighted size exceeds its `max_capacity`. ```rust +use std::convert::TryInto; use moka::sync::Cache; fn main() { @@ -287,7 +288,9 @@ fn main() { // A weigher closure takes &K and &V and returns a u32 representing the // relative size of the entry. Here, we use the byte length of the value // String as the size. - .weigher(|_key, value: &String| -> u32 { value.len() as u32 }) + .weigher(|_key, value: &String| -> u32 { + value.len().try_into().unwrap_or(u32::MAX) + }) // This cache will hold up to 32MiB of values. .max_capacity(32 * 1024 * 1024) .build(); diff --git a/src/future/cache.rs b/src/future/cache.rs index 5b368381..97f45c17 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -146,6 +146,7 @@ use std::{ /// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } /// // futures = "0.3" /// +/// use std::convert::TryInto; /// use moka::future::Cache; /// /// #[tokio::main] @@ -165,7 +166,9 @@ use std::{ /// .max_capacity(32 * 1024 * 1024) /// // A weigher closure takes &K and &V and returns a u32 representing the /// // relative size of the entry. -/// .weigher(|_key, value: &String| -> u32 { value.len() as u32 }) +/// .weigher(|_key, value: &String| -> u32 { +/// value.len().try_into().unwrap_or(u32::MAX) +/// }) /// .build(); /// cache.insert(2, "two".to_string()).await; /// } diff --git a/src/sync/cache.rs b/src/sync/cache.rs index b2eaf705..e2d674c9 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -111,6 +111,7 @@ use std::{ /// ## Size-based /// /// ```rust +/// use std::convert::TryInto; /// use moka::sync::Cache; /// /// // Evict based on the number of entries in the cache. @@ -128,7 +129,9 @@ use std::{ /// .max_capacity(32 * 1024 * 1024) /// // A weigher closure takes &K and &V and returns a u32 representing the /// // relative size of the entry. -/// .weigher(|_key, value: &String| -> u32 { value.len() as u32 }) +/// .weigher(|_key, value: &String| -> u32 { +/// value.len().try_into().unwrap_or(u32::MAX) +/// }) /// .build(); /// cache.insert(2, "two".to_string()); /// ``` From 841da56e7b095c44ac18d8aa1acc7832166cb0f5 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sat, 25 Dec 2021 22:19:48 +0800 Subject: [PATCH 28/42] Size-aware cache management Implement weighted size handling in `sync::BaseCache` for when updating an existing cache entry. --- src/sync.rs | 65 +++++++++++++++---- src/sync/base_cache.rs | 138 +++++++++++++++++++++-------------------- 2 files changed, 125 insertions(+), 78 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index 290b11c4..285d8fe3 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -53,14 +53,26 @@ pub(crate) trait EntryInfo: AccessTime { fn is_admitted(&self) -> bool; fn set_is_admitted(&self, value: bool); fn reset_timestamps(&self); + fn weighted_size(&self) -> u32; + fn set_weighted_size(&self, size: u32); } -#[derive(Default)] pub(crate) struct EntryInfoFull { is_admitted: AtomicBool, last_accessed: AtomicInstant, last_modified: AtomicInstant, - _weighed_size: AtomicU32, + weighted_size: AtomicU32, +} + +impl EntryInfoFull { + fn new(weighted_size: u32) -> Self { + Self { + is_admitted: Default::default(), + last_accessed: Default::default(), + last_modified: Default::default(), + weighted_size: AtomicU32::new(weighted_size), + } + } } impl EntryInfo for EntryInfoFull { @@ -79,6 +91,16 @@ impl EntryInfo for EntryInfoFull { self.last_accessed.reset(); self.last_modified.reset(); } + + #[inline] + fn weighted_size(&self) -> u32 { + self.weighted_size.load(Ordering::Acquire) + } + + #[inline] + fn set_weighted_size(&self, size: u32) { + self.weighted_size.store(size, Ordering::Release); + } } impl AccessTime for EntryInfoFull { @@ -131,8 +153,11 @@ pub(crate) struct KeyDate { } impl KeyDate { - pub(crate) fn new(key: Arc, entry_info: ArcedEntryInfo) -> Self { - Self { key, entry_info } + pub(crate) fn new(key: Arc, entry_info: &ArcedEntryInfo) -> Self { + Self { + key, + entry_info: Arc::clone(entry_info), + } } pub(crate) fn key(&self) -> &Arc { @@ -155,11 +180,11 @@ pub(crate) struct KeyHashDate { } impl KeyHashDate { - pub(crate) fn new(kh: KeyHash, entry_info: ArcedEntryInfo) -> Self { + pub(crate) fn new(kh: KeyHash, entry_info: &ArcedEntryInfo) -> Self { Self { key: kh.key, hash: kh.hash, - entry_info, + entry_info: Arc::clone(entry_info), } } @@ -204,10 +229,10 @@ pub(crate) struct ValueEntry { } impl ValueEntry { - pub(crate) fn new(value: V) -> Self { + pub(crate) fn new(value: V, weighted_size: u32) -> Self { Self { value, - info: Arc::new(EntryInfoFull::default()), + info: Arc::new(EntryInfoFull::new(weighted_size)), nodes: Mutex::new(DeqNodes { access_order_q_node: None, write_order_q_node: None, @@ -215,7 +240,7 @@ impl ValueEntry { } } - pub(crate) fn new_with(value: V, other: &Self) -> Self { + pub(crate) fn new_with(value: V, weighted_size: u32, other: &Self) -> Self { let nodes = { let other_nodes = other.nodes.lock(); DeqNodes { @@ -224,6 +249,7 @@ impl ValueEntry { } }; let info = Arc::clone(&other.info); + info.set_weighted_size(weighted_size); // To prevent this updated ValueEntry from being evicted by an expiration policy, // set the max value to the timestamps. They will be replaced with the real // timestamps when applying writes. @@ -235,8 +261,8 @@ impl ValueEntry { } } - pub(crate) fn entry_info(&self) -> ArcedEntryInfo { - Arc::clone(&self.info) + pub(crate) fn entry_info(&self) -> &ArcedEntryInfo { + &self.info } pub(crate) fn is_admitted(&self) -> bool { @@ -247,6 +273,11 @@ impl ValueEntry { self.info.set_is_admitted(value); } + #[inline] + pub(crate) fn weighted_size(&self) -> u32 { + self.info.weighted_size() + } + pub(crate) fn access_order_q_node(&self) -> Option> { self.nodes.lock().access_order_q_node } @@ -351,6 +382,16 @@ pub(crate) enum ReadOp { } pub(crate) enum WriteOp { - Upsert(KeyHash, Arc>), + Insert { + key_hash: KeyHash, + value_entry: Arc>, + new_weighted_size: u32, + }, + Update { + key_hash: KeyHash, + value_entry: Arc>, + old_weighted_size: u32, + new_weighted_size: u32, + }, Remove(KvEntry), } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index f3a1c3b4..102e0962 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -236,6 +236,7 @@ where #[inline] pub(crate) fn do_insert_with_hash(&self, key: Arc, hash: u64, value: V) -> WriteOp { + let ws = self.inner.weigh(&key, &value); let op_cnt1 = Rc::new(AtomicU8::new(0)); let op_cnt2 = Rc::clone(&op_cnt1); let mut op1 = None; @@ -253,11 +254,15 @@ where Arc::clone(&key), // on_insert || { - let entry = Arc::new(ValueEntry::new(value.clone())); + let entry = Arc::new(ValueEntry::new(value.clone(), ws)); let cnt = op_cnt1.fetch_add(1, Ordering::Relaxed); op1 = Some(( cnt, - WriteOp::Upsert(KeyHash::new(Arc::clone(&key), hash), Arc::clone(&entry)), + WriteOp::Insert { + key_hash: KeyHash::new(Arc::clone(&key), hash), + value_entry: Arc::clone(&entry), + new_weighted_size: ws, + }, )); entry }, @@ -266,12 +271,18 @@ where // NOTE: `new_with` sets the max value to the last_accessed and last_modified // to prevent this updated ValueEntry from being evicted by an expiration policy. // See the comments in `new_with` for more details. - let entry = Arc::new(ValueEntry::new_with(value.clone(), old_entry)); + let old_weighted_size = old_entry.weighted_size(); + let entry = Arc::new(ValueEntry::new_with(value.clone(), ws, old_entry)); let cnt = op_cnt2.fetch_add(1, Ordering::Relaxed); op2 = Some(( cnt, Arc::clone(old_entry), - WriteOp::Upsert(KeyHash::new(Arc::clone(&key), hash), Arc::clone(&entry)), + WriteOp::Update { + key_hash: KeyHash::new(Arc::clone(&key), hash), + value_entry: Arc::clone(&entry), + old_weighted_size, + new_weighted_size: ws, + }, )); entry }, @@ -351,27 +362,18 @@ where } } -struct WeightedSize<'a, K, V> { - size: u64, - weigher: Option<&'a Weigher>, -} - -impl<'a, K, V> WeightedSize<'a, K, V> { - #[inline] - fn weigh(&self, key: &K, value: &V) -> u32 { - self.weigher.map(|w| w(key, value)).unwrap_or(1) - } +struct WeightedSize(u64); +impl WeightedSize { #[inline] fn saturating_add(&mut self, weight: u32) { - let total = &mut self.size; + let total = &mut self.0; *total = total.saturating_add(weight as u64); } #[inline] - fn saturating_sub(&mut self, key: &K, value: &V) { - let weight = self.weigh(key, value); - let total = &mut self.size; + fn saturating_sub(&mut self, weight: u32) { + let total = &mut self.0; *total = total.saturating_sub(weight as u64); } } @@ -390,8 +392,8 @@ impl EntrySizeAndFrequency { } } - fn add_policy_weight(&mut self, ws: &WeightedSize<'_, K, V>, key: &K, value: &V) { - self.weight += ws.weigh(key, value) as u64; + fn add_policy_weight(&mut self, weighted_size: u32) { + self.weight += weighted_size as u64; } fn add_frequency(&mut self, freq: &FrequencySketch, hash: u64) { @@ -590,6 +592,11 @@ where false } + #[inline] + fn weigh(&self, key: &K, value: &V) -> u32 { + self.weigher.as_ref().map(|w| w(key, value)).unwrap_or(1) + } + #[inline] fn current_time_from_expiration_clock(&self) -> Instant { if self.has_expiration_clock.load(Ordering::Relaxed) { @@ -644,11 +651,7 @@ where let mut should_sync = true; let current_ws = self.weighted_size.load(); - let mut ws = WeightedSize { - size: current_ws, - weigher: self.weigher.as_ref(), - }; - + let mut ws = WeightedSize(current_ws); while should_sync && calls <= max_repeats { let r_len = self.read_op_ch.len(); if r_len > 0 { @@ -682,7 +685,7 @@ where } debug_assert_eq!(self.weighted_size.load(), current_ws); - self.weighted_size.store(ws.size); + self.weighted_size.store(ws.0); if should_sync { Some(SyncPace::Fast) @@ -704,9 +707,9 @@ where V: Send + Sync + 'static, S: BuildHasher + Clone + Send + Sync + 'static, { - fn has_enough_capacity(&self, candidate_weight: u32, ws: &WeightedSize<'_, K, V>) -> bool { + fn has_enough_capacity(&self, candidate_weight: u32, ws: &WeightedSize) -> bool { self.max_capacity - .map(|limit| ws.size + candidate_weight as u64 <= limit) + .map(|limit| ws.0 + candidate_weight as u64 <= limit) .unwrap_or(true) } @@ -727,7 +730,7 @@ where } } - fn apply_writes(&self, deqs: &mut Deques, count: usize, ws: &mut WeightedSize<'_, K, V>) { + fn apply_writes(&self, deqs: &mut Deques, count: usize, ws: &mut WeightedSize) { use WriteOp::*; let freq = self.frequency_sketch.read(); let ch = &self.write_op_ch; @@ -735,45 +738,55 @@ where for _ in 0..count { match ch.try_recv() { - Ok(Upsert(kh, entry)) => self.handle_upsert(kh, entry, ts, deqs, &freq, ws), - Ok(Remove(KvEntry { key, entry })) => Self::handle_remove(deqs, &key, entry, ws), + Ok(Insert { + key_hash: kh, + value_entry: entry, + new_weighted_size: new_size, + }) => self.handle_upsert(kh, entry, 0, new_size, ts, deqs, &freq, ws), + Ok(Update { + key_hash: kh, + value_entry: entry, + old_weighted_size: old_size, + new_weighted_size: new_size, + }) => self.handle_upsert(kh, entry, old_size, new_size, ts, deqs, &freq, ws), + Ok(Remove(KvEntry { key: _key, entry })) => Self::handle_remove(deqs, entry, ws), Err(_) => break, }; } } + #[allow(clippy::too_many_arguments)] fn handle_upsert( &self, kh: KeyHash, entry: Arc>, + old_policy_weight: u32, + new_policy_weight: u32, timestamp: Instant, deqs: &mut Deques, freq: &FrequencySketch, - ws: &mut WeightedSize<'_, K, V>, + ws: &mut WeightedSize, ) { entry.set_last_accessed(timestamp); entry.set_last_modified(timestamp); - let policy_weight = ws.weigh(&kh.key, &entry.value); - if entry.is_admitted() { - // TODO: Update the total weight. - // The entry has been already admitted, so treat this as an update. + ws.saturating_add(new_policy_weight - old_policy_weight); deqs.move_to_back_ao(&entry); deqs.move_to_back_wo(&entry); return; } - if self.has_enough_capacity(policy_weight, ws) { + if self.has_enough_capacity(new_policy_weight, ws) { // There are enough room in the cache (or the cache is unbounded). // Add the candidate to the deques. - self.handle_admit(kh, &entry, policy_weight, deqs, ws); + self.handle_admit(kh, &entry, new_policy_weight, deqs, ws); return; } if let Some(max) = self.max_capacity { - if policy_weight as u64 > max { + if new_policy_weight as u64 > max { // The candidate is too big to fit in the cache. Reject it. self.cache.remove(&Arc::clone(&kh.key)); return; @@ -781,11 +794,11 @@ where } let skipped_nodes; - let mut candidate = EntrySizeAndFrequency::new(policy_weight); + let mut candidate = EntrySizeAndFrequency::new(new_policy_weight); candidate.add_frequency(freq, kh.hash); // Try to admit the candidate. - match Self::admit(&candidate, &self.cache, deqs, freq, ws) { + match Self::admit(&candidate, &self.cache, deqs, freq) { AdmissionResult::Admitted { victim_nodes, skipped_nodes: mut skipped, @@ -794,12 +807,12 @@ where // Try to remove the victims from the cache (hash map). for victim in victim_nodes { - if let Some((vic_key, vic_entry)) = self + if let Some((_vic_key, vic_entry)) = self .cache .remove_entry(unsafe { &victim.as_ref().element.key }) { // And then remove the victim from the deques. - Self::handle_remove(deqs, &vic_key, vic_entry, ws); + Self::handle_remove(deqs, vic_entry, ws); } else { // Could not remove the victim from the cache. Skip this // victim node as its ValueEntry might have been @@ -810,7 +823,7 @@ where skipped_nodes = skipped; // Add the candidate to the deques. - self.handle_admit(kh, &entry, policy_weight, deqs, ws); + self.handle_admit(kh, &entry, new_policy_weight, deqs, ws); } AdmissionResult::Rejected { skipped_nodes: s } => { skipped_nodes = s; @@ -849,7 +862,6 @@ where cache: &CacheStore, deqs: &Deques, freq: &FrequencySketch, - ws: &WeightedSize<'_, K, V>, ) -> AdmissionResult { const MAX_CONSECUTIVE_RETRIES: usize = 5; let mut retries = 0; @@ -870,7 +882,7 @@ where next_victim = victim.next_node(); if let Some(vic_entry) = cache.get(&victim.element.key) { - victims.add_policy_weight(ws, &victim.element.key, &vic_entry.value); + victims.add_policy_weight(vic_entry.weighted_size()); victims.add_frequency(freq, victim.element.hash); victim_nodes.push(NonNull::from(victim)); retries = 0; @@ -911,7 +923,7 @@ where entry: &Arc>, policy_weight: u32, deqs: &mut Deques, - ws: &mut WeightedSize<'_, K, V>, + ws: &mut WeightedSize, ) { let key = Arc::clone(&kh.key); ws.saturating_add(policy_weight); @@ -926,15 +938,10 @@ where entry.set_is_admitted(true); } - fn handle_remove( - deqs: &mut Deques, - key: &Arc, - entry: Arc>, - ws: &mut WeightedSize<'_, K, V>, - ) { + fn handle_remove(deqs: &mut Deques, entry: Arc>, ws: &mut WeightedSize) { if entry.is_admitted() { entry.set_is_admitted(false); - ws.saturating_sub(key, &entry.value); + ws.saturating_sub(entry.weighted_size()); deqs.unlink_ao(&entry); Deques::unlink_wo(&mut deqs.write_order, &entry); } @@ -945,20 +952,19 @@ where ao_deq_name: &str, ao_deq: &mut Deque>, wo_deq: &mut Deque>, - key: &Arc, entry: Arc>, - ws: &mut WeightedSize<'_, K, V>, + ws: &mut WeightedSize, ) { if entry.is_admitted() { entry.set_is_admitted(false); - ws.saturating_sub(key, &entry.value); + ws.saturating_sub(entry.weighted_size()); Deques::unlink_ao_from_deque(ao_deq_name, ao_deq, &entry); Deques::unlink_wo(wo_deq, &entry); } entry.unset_q_nodes(); } - fn evict(&self, deqs: &mut Deques, batch_size: usize, ws: &mut WeightedSize<'_, K, V>) { + fn evict(&self, deqs: &mut Deques, batch_size: usize, ws: &mut WeightedSize) { let now = self.current_time_from_expiration_clock(); if self.is_write_order_queue_enabled() { @@ -990,7 +996,7 @@ where write_order_deq: &mut Deque>, batch_size: usize, now: Instant, - ws: &mut WeightedSize<'_, K, V>, + ws: &mut WeightedSize, ) { let tti = &self.time_to_idle; let va = &self.valid_after(); @@ -1025,7 +1031,7 @@ where .remove_if(key, |_, v| is_expired_entry_ao(tti, va, v, now)); if let Some(entry) = maybe_entry { - Self::handle_remove_with_deques(deq_name, deq, write_order_deq, key, entry, ws); + Self::handle_remove_with_deques(deq_name, deq, write_order_deq, entry, ws); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_accessed(); if ts.is_none() { @@ -1055,7 +1061,7 @@ where deqs: &mut Deques, batch_size: usize, now: Instant, - ws: &mut WeightedSize<'_, K, V>, + ws: &mut WeightedSize, ) { let ttl = &self.time_to_live; let va = &self.valid_after(); @@ -1086,7 +1092,7 @@ where .remove_if(key, |_, v| is_expired_entry_wo(ttl, va, v, now)); if let Some(entry) = maybe_entry { - Self::handle_remove(deqs, key, entry, ws); + Self::handle_remove(deqs, entry, ws); } else if let Some(entry) = self.cache.get(key) { let ts = entry.last_modified(); if ts.is_none() { @@ -1114,7 +1120,7 @@ where invalidator: &Invalidator, deqs: &mut Deques, batch_size: usize, - ws: &mut WeightedSize<'_, K, V>, + ws: &mut WeightedSize, ) { self.process_invalidation_result(invalidator, deqs, ws); self.submit_invalidation_task(invalidator, &mut deqs.write_order, batch_size); @@ -1124,15 +1130,15 @@ where &self, invalidator: &Invalidator, deqs: &mut Deques, - ws: &mut WeightedSize<'_, K, V>, + ws: &mut WeightedSize, ) { if let Some(InvalidationResult { invalidated, is_done, }) = invalidator.task_result() { - for KvEntry { key, entry } in invalidated { - Self::handle_remove(deqs, &key, entry, ws); + for KvEntry { key: _, entry } in invalidated { + Self::handle_remove(deqs, entry, ws); } if is_done { deqs.write_order.reset_cursor(); From 4481394e1a9bab47bc540a4aac097013538cdc98 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 26 Dec 2021 06:11:16 +0800 Subject: [PATCH 29/42] Size-aware cache management Change `WriteOp` back to having `Upsert`. --- src/sync.rs | 7 +------ src/sync/base_cache.rs | 12 ++++-------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index 285d8fe3..8a43f6dc 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -382,12 +382,7 @@ pub(crate) enum ReadOp { } pub(crate) enum WriteOp { - Insert { - key_hash: KeyHash, - value_entry: Arc>, - new_weighted_size: u32, - }, - Update { + Upsert { key_hash: KeyHash, value_entry: Arc>, old_weighted_size: u32, diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 102e0962..7c057daf 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -258,9 +258,10 @@ where let cnt = op_cnt1.fetch_add(1, Ordering::Relaxed); op1 = Some(( cnt, - WriteOp::Insert { + WriteOp::Upsert { key_hash: KeyHash::new(Arc::clone(&key), hash), value_entry: Arc::clone(&entry), + old_weighted_size: 0, new_weighted_size: ws, }, )); @@ -277,7 +278,7 @@ where op2 = Some(( cnt, Arc::clone(old_entry), - WriteOp::Update { + WriteOp::Upsert { key_hash: KeyHash::new(Arc::clone(&key), hash), value_entry: Arc::clone(&entry), old_weighted_size, @@ -738,12 +739,7 @@ where for _ in 0..count { match ch.try_recv() { - Ok(Insert { - key_hash: kh, - value_entry: entry, - new_weighted_size: new_size, - }) => self.handle_upsert(kh, entry, 0, new_size, ts, deqs, &freq, ws), - Ok(Update { + Ok(Upsert { key_hash: kh, value_entry: entry, old_weighted_size: old_size, From a8da8808732968d2ad1136580746a28c7bd3ab70 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 26 Dec 2021 06:58:33 +0800 Subject: [PATCH 30/42] Size-aware cache management Add `evict_lru_entries` to `sync::BaseCache`, which will evict LRU entries when cache's capacity is exceeded by updating existing entries with bigger weighted sizes. --- src/future/cache.rs | 7 ++ src/sync.rs | 4 - src/sync/base_cache.rs | 163 ++++++++++++++++++++++++++++------------- src/sync/cache.rs | 7 ++ 4 files changed, 128 insertions(+), 53 deletions(-) diff --git a/src/future/cache.rs b/src/future/cache.rs index 97f45c17..51fd11d7 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -963,6 +963,7 @@ mod tests { let alice = ("alice", 10); let bob = ("bob", 15); + let bill = ("bill", 20); let cindy = ("cindy", 5); let david = ("david", 15); let dennis = ("dennis", 15); @@ -1016,6 +1017,12 @@ mod tests { assert_eq!(cache.get(&"b"), Some(bob)); assert_eq!(cache.get(&"c"), None); assert_eq!(cache.get(&"d"), Some(dennis)); + + // Update "b" with "bill" (w: 20). This should evict "d" (w: 15). + cache.insert("b", bill).await; + cache.sync(); + assert_eq!(cache.get(&"b"), Some(bill)); + assert_eq!(cache.get(&"d"), None); } #[tokio::test] diff --git a/src/sync.rs b/src/sync.rs index 8a43f6dc..db400184 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -164,10 +164,6 @@ impl KeyDate { &self.key } - pub(crate) fn entry_info(&self) -> &ArcedEntryInfo { - &self.entry_info - } - pub(crate) fn last_modified(&self) -> Option { self.entry_info.last_modified() } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 7c057daf..d360f492 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -669,7 +669,7 @@ where } if self.has_expiry() || self.has_valid_after() { - self.evict(&mut deqs, EVICTION_BATCH_SIZE, &mut ws); + self.evict_expired(&mut deqs, EVICTION_BATCH_SIZE, &mut ws); } if self.invalidator_enabled { @@ -685,6 +685,12 @@ where } } + // Evict if this cache has more entries than its capacity. + let weights_to_evict = self.weights_to_evict(&ws); + if weights_to_evict > 0 { + self.evict_lru_entries(&mut deqs, EVICTION_BATCH_SIZE, weights_to_evict, &mut ws); + } + debug_assert_eq!(self.weighted_size.load(), current_ws); self.weighted_size.store(ws.0); @@ -714,6 +720,12 @@ where .unwrap_or(true) } + fn weights_to_evict(&self, ws: &WeightedSize) -> u64 { + self.max_capacity + .map(|limit| ws.0.saturating_sub(limit)) + .unwrap_or_default() + } + fn apply_reads(&self, deqs: &mut Deques, count: usize) { use ReadOp::*; let mut freq = self.frequency_sketch.write(); @@ -960,7 +972,7 @@ where entry.unset_q_nodes(); } - fn evict(&self, deqs: &mut Deques, batch_size: usize, ws: &mut WeightedSize) { + fn evict_expired(&self, deqs: &mut Deques, batch_size: usize, ws: &mut WeightedSize) { let now = self.current_time_from_expiration_clock(); if self.is_write_order_queue_enabled() { @@ -998,19 +1010,13 @@ where let va = &self.valid_after(); for _ in 0..batch_size { // Peek the front node of the deque and check if it is expired. - let (key, _ts) = deq - .peek_front() - .and_then(|node| { - if is_expired_entry_ao(tti, va, &*node, now) { - Some(( - Some(Arc::clone(node.element.key())), - Some(Arc::clone(node.element.entry_info())), - )) - } else { - None - } - }) - .unwrap_or_default(); + let key = deq.peek_front().and_then(|node| { + if is_expired_entry_ao(tti, va, &*node, now) { + Some(Arc::clone(node.element.key())) + } else { + None + } + }); if key.is_none() { break; @@ -1028,26 +1034,40 @@ where if let Some(entry) = maybe_entry { Self::handle_remove_with_deques(deq_name, deq, write_order_deq, entry, ws); - } else if let Some(entry) = self.cache.get(key) { - let ts = entry.last_accessed(); - if ts.is_none() { - // The key exists and the entry has been updated. - Deques::move_to_back_ao_in_deque(deq_name, deq, &entry); - Deques::move_to_back_wo_in_deque(write_order_deq, &entry); - } else { - // The key exists but something unexpected. Break. - break; - } + } else if !self.try_skip_updated_entry(key, deq_name, deq, write_order_deq) { + break; + } + } + } + + #[inline] + fn try_skip_updated_entry( + &self, + key: &K, + deq_name: &str, + deq: &mut Deque>, + write_order_deq: &mut Deque>, + ) -> bool { + if let Some(entry) = self.cache.get(key) { + if entry.last_accessed().is_none() { + // The key exists and the entry has been updated. + Deques::move_to_back_ao_in_deque(deq_name, deq, &entry); + Deques::move_to_back_wo_in_deque(write_order_deq, &entry); + true } else { - // Skip this entry as the key might have been invalidated. Since the - // invalidated ValueEntry (which should be still in the write op - // queue) has a pointer to this node, move the node to the back of - // the deque instead of popping (dropping) it. - if let Some(node) = deq.peek_front() { - let node = NonNull::from(node); - unsafe { deq.move_to_back(node) }; - } + // The key exists but something unexpected. + false } + } else { + // Skip this entry as the key might have been invalidated. Since the + // invalidated ValueEntry (which should be still in the write op + // queue) has a pointer to this node, move the node to the back of + // the deque instead of popping (dropping) it. + if let Some(node) = deq.peek_front() { + let node = NonNull::from(node); + unsafe { deq.move_to_back(node) }; + } + true } } @@ -1062,20 +1082,13 @@ where let ttl = &self.time_to_live; let va = &self.valid_after(); for _ in 0..batch_size { - let (key, _ts) = deqs - .write_order - .peek_front() - .and_then(|node| { - if is_expired_entry_wo(ttl, va, &*node, now) { - Some(( - Some(Arc::clone(node.element.key())), - Some(Arc::clone(node.element.entry_info())), - )) - } else { - None - } - }) - .unwrap_or_default(); + let key = deqs.write_order.peek_front().and_then(|node| { + if is_expired_entry_wo(ttl, va, &*node, now) { + Some(Arc::clone(node.element.key())) + } else { + None + } + }); if key.is_none() { break; @@ -1090,8 +1103,7 @@ where if let Some(entry) = maybe_entry { Self::handle_remove(deqs, entry, ws); } else if let Some(entry) = self.cache.get(key) { - let ts = entry.last_modified(); - if ts.is_none() { + if entry.last_modified().is_none() { deqs.move_to_back_ao(&entry); deqs.move_to_back_wo(&entry); } else { @@ -1177,6 +1189,59 @@ where invalidator.submit_task(candidates, is_truncated); } } + + fn evict_lru_entries( + &self, + deqs: &mut Deques, + batch_size: usize, + weights_to_evict: u64, + ws: &mut WeightedSize, + ) { + const DEQ_NAME: &str = "probation"; + let mut evicted = 0u64; + let (deq, write_order_deq) = (&mut deqs.probation, &mut deqs.write_order); + + for _ in 0..batch_size { + let maybe_key_and_ts = deq.peek_front().map(|node| { + ( + Arc::clone(node.element.key()), + node.element.entry_info().last_modified(), + ) + }); + + let (key, ts) = match maybe_key_and_ts { + Some((key, Some(ts))) => (key, ts), + Some((key, None)) => { + if self.try_skip_updated_entry(&key, DEQ_NAME, deq, write_order_deq) { + continue; + } else { + break; + } + } + None => break, + }; + + let maybe_entry = self.cache.remove_if(&key, |_, v| { + if let Some(lm) = v.last_modified() { + lm == ts + } else { + false + } + }); + + if let Some(entry) = maybe_entry { + let weight = entry.weighted_size(); + Self::handle_remove_with_deques(DEQ_NAME, deq, write_order_deq, entry, ws); + evicted += weight as u64; + } else if !self.try_skip_updated_entry(&key, DEQ_NAME, deq, write_order_deq) { + break; + } + + if evicted >= weights_to_evict { + break; + } + } + } } // diff --git a/src/sync/cache.rs b/src/sync/cache.rs index e2d674c9..5f9a9a33 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -803,6 +803,7 @@ mod tests { let alice = ("alice", 10); let bob = ("bob", 15); + let bill = ("bill", 20); let cindy = ("cindy", 5); let david = ("david", 15); let dennis = ("dennis", 15); @@ -856,6 +857,12 @@ mod tests { assert_eq!(cache.get(&"b"), Some(bob)); assert_eq!(cache.get(&"c"), None); assert_eq!(cache.get(&"d"), Some(dennis)); + + // Update "b" with "bill" (w: 20). This should evict "d" (w: 15). + cache.insert("b", bill); + cache.sync(); + assert_eq!(cache.get(&"b"), Some(bill)); + assert_eq!(cache.get(&"d"), None); } #[test] From 684f9b69eebcb1f30e76fe69ce995fd54ef1b21b Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 26 Dec 2021 08:30:07 +0800 Subject: [PATCH 31/42] Size-aware cache management Add `evict_lru_entries` to `unsync::Cache`, which will evict LRU entries when cache's capacity is exceeded by updating existing entries with bigger weighted sizes. --- src/sync/base_cache.rs | 10 ++-- src/unsync/cache.rs | 120 ++++++++++++++++++++++++++++++++++------- 2 files changed, 105 insertions(+), 25 deletions(-) diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index d360f492..4d036765 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -1202,6 +1202,10 @@ where let (deq, write_order_deq) = (&mut deqs.probation, &mut deqs.write_order); for _ in 0..batch_size { + if evicted >= weights_to_evict { + break; + } + let maybe_key_and_ts = deq.peek_front().map(|node| { ( Arc::clone(node.element.key()), @@ -1232,14 +1236,10 @@ where if let Some(entry) = maybe_entry { let weight = entry.weighted_size(); Self::handle_remove_with_deques(DEQ_NAME, deq, write_order_deq, entry, ws); - evicted += weight as u64; + evicted = evicted.saturating_add(weight as u64); } else if !self.try_skip_updated_entry(&key, DEQ_NAME, deq, write_order_deq) { break; } - - if evicted >= weights_to_evict { - break; - } } } } diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index 3e12caab..9e640cc7 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -16,6 +16,8 @@ use std::{ time::Duration, }; +const EVICTION_BATCH_SIZE: usize = 100; + type CacheStore = std::collections::HashMap, ValueEntry, S>; /// An in-memory cache that is _not_ thread-safe. @@ -193,7 +195,8 @@ where Rc: Borrow, Q: Hash + Eq + ?Sized, { - let timestamp = self.evict_if_needed(); + let timestamp = self.evict_expired_if_needed(); + self.evict_lru_entries(); self.frequency_sketch.increment(self.hash(key)); match (self.cache.get_mut(key), timestamp, &mut self.deques) { @@ -222,7 +225,8 @@ where /// /// If the cache has this key present, the value is updated. pub fn insert(&mut self, key: K, value: V) { - let timestamp = self.evict_if_needed(); + let timestamp = self.evict_expired_if_needed(); + self.evict_lru_entries(); let policy_weight = weigh(&mut self.weigher, &key, &value); let key = Rc::new(key); let entry = ValueEntry::new(value); @@ -245,7 +249,8 @@ where Rc: Borrow, Q: Hash + Eq + ?Sized, { - self.evict_if_needed(); + self.evict_expired_if_needed(); + self.evict_lru_entries(); // TODO: Update the weighted_size. if let Some(mut entry) = self.cache.remove(key) { @@ -343,10 +348,10 @@ where } #[inline] - fn evict_if_needed(&mut self) -> Option { + fn evict_expired_if_needed(&mut self) -> Option { if self.has_expiry() { let ts = self.current_time_from_expiration_clock(); - self.evict(ts); + self.evict_expired(ts); Some(ts) } else { None @@ -407,6 +412,12 @@ where .unwrap_or(true) } + fn weights_to_evict(&self) -> u64 { + self.max_capacity + .map(|limit| self.weighted_size.saturating_sub(limit)) + .unwrap_or_default() + } + fn saturating_add_to_total_weight(&mut self, weight: u64) { let total = &mut self.weighted_size; *total = total.saturating_add(weight as u64); @@ -596,27 +607,29 @@ where } let deqs = &mut self.deques; deqs.move_to_back_ao(entry); - deqs.move_to_back_wo(entry); + if self.time_to_live.is_some() { + deqs.move_to_back_wo(entry); + } self.saturating_sub_from_total_weight(old_policy_weight); self.saturating_add_to_total_weight(policy_weight as u64); } - fn evict(&mut self, now: Instant) { - const EVICTION_BATCH_SIZE: usize = 100; - + fn evict_expired(&mut self, now: Instant) { if self.time_to_live.is_some() { - self.remove_expired_wo(EVICTION_BATCH_SIZE, now); + let evicted = self.remove_expired_wo(EVICTION_BATCH_SIZE, now); + self.saturating_sub_from_total_weight(evicted); } if self.time_to_idle.is_some() { let deqs = &mut self.deques; - let (window, probation, protected, wo, cache, time_to_idle) = ( + let (window, probation, protected, wo, cache, weigher, time_to_idle) = ( &mut deqs.window, &mut deqs.probation, &mut deqs.protected, &mut deqs.write_order, &mut self.cache, + &mut self.weigher, &self.time_to_idle, ); @@ -627,18 +640,23 @@ where wo, cache, time_to_idle, + weigher, EVICTION_BATCH_SIZE, now, ) }; - rm_expired_ao("window", window); - rm_expired_ao("probation", probation); - rm_expired_ao("protected", protected); + let evicted1 = rm_expired_ao("window", window); + let evicted2 = rm_expired_ao("probation", probation); + let evicted3 = rm_expired_ao("protected", protected); + + self.saturating_sub_from_total_weight(evicted1); + self.saturating_sub_from_total_weight(evicted2); + self.saturating_sub_from_total_weight(evicted3); } } - // TODO: Update the weighted_size. + #[allow(clippy::too_many_arguments)] #[inline] fn remove_expired_ao( deq_name: &str, @@ -646,9 +664,12 @@ where write_order_deq: &mut Deque>, cache: &mut CacheStore, time_to_idle: &Option, + weigher: &mut Option>, batch_size: usize, now: Instant, - ) { + ) -> u64 { + let mut evicted = 0u64; + for _ in 0..batch_size { let key = deq .peek_front() @@ -665,19 +686,26 @@ where break; } - if let Some(mut entry) = cache.remove(&key.unwrap()) { + let key = key.unwrap(); + + if let Some(mut entry) = cache.remove(&key) { + let weight = weigh(weigher, &key, &entry.value); Deques::unlink_ao_from_deque(deq_name, deq, &mut entry); Deques::unlink_wo(write_order_deq, &mut entry); + evicted = evicted.saturating_add(weight as u64); } else { deq.pop_front(); } } + + evicted } - // TODO: Update the weighted_size. #[inline] - fn remove_expired_wo(&mut self, batch_size: usize, now: Instant) { + fn remove_expired_wo(&mut self, batch_size: usize, now: Instant) -> u64 { + let mut evicted = 0u64; let time_to_live = &self.time_to_live; + for _ in 0..batch_size { let key = self .deques @@ -696,13 +724,59 @@ where break; } - if let Some(mut entry) = self.cache.remove(&key.unwrap()) { + let key = key.unwrap(); + + if let Some(mut entry) = self.cache.remove(&key) { + let weight = weigh(&mut self.weigher, &key, &entry.value); self.deques.unlink_ao(&mut entry); Deques::unlink_wo(&mut self.deques.write_order, &mut entry); + evicted = evicted.saturating_sub(weight as u64); } else { self.deques.write_order.pop_front(); } } + + evicted + } + + #[inline] + fn evict_lru_entries(&mut self) { + const DEQ_NAME: &str = "probation"; + + let weights_to_evict = self.weights_to_evict(); + let mut evicted = 0u64; + + { + let deqs = &mut self.deques; + let (probation, wo, cache) = + (&mut deqs.probation, &mut deqs.write_order, &mut self.cache); + + for _ in 0..EVICTION_BATCH_SIZE { + if evicted >= weights_to_evict { + break; + } + + let key = probation + .peek_front() + .map(|node| Rc::clone(&node.element.key)); + + if key.is_none() { + break; + } + let key = key.unwrap(); + + if let Some(mut entry) = cache.remove(&key) { + let weight = weigh(&mut self.weigher, &key, &entry.value); + Deques::unlink_ao_from_deque(DEQ_NAME, probation, &mut entry); + Deques::unlink_wo(wo, &mut entry); + evicted = evicted.saturating_add(weight as u64); + } else { + probation.pop_front(); + } + } + } + + self.saturating_sub_from_total_weight(evicted); } } @@ -813,6 +887,7 @@ mod tests { let alice = ("alice", 10); let bob = ("bob", 15); + let bill = ("bill", 20); let cindy = ("cindy", 5); let david = ("david", 15); let dennis = ("dennis", 15); @@ -854,6 +929,11 @@ mod tests { assert_eq!(cache.get(&"b"), Some(&bob)); assert_eq!(cache.get(&"c"), None); assert_eq!(cache.get(&"d"), Some(&dennis)); + + // Update "b" with "bill" (w: 20). This should evict "d" (w: 15). + cache.insert("b", bill); + assert_eq!(cache.get(&"b"), Some(&bill)); + assert_eq!(cache.get(&"d"), None); } #[test] From 61fcc5f771a98861c9a48c88ed42ad6bc47ff86d Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 26 Dec 2021 19:18:50 +0800 Subject: [PATCH 32/42] Size-aware cache management Rename `weighted_size` in `ValueEntry` to `policy_weight`. --- src/sync.rs | 34 +++++++++++++------------- src/sync/base_cache.rs | 54 ++++++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 45 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index db400184..2e5b42f7 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -53,24 +53,24 @@ pub(crate) trait EntryInfo: AccessTime { fn is_admitted(&self) -> bool; fn set_is_admitted(&self, value: bool); fn reset_timestamps(&self); - fn weighted_size(&self) -> u32; - fn set_weighted_size(&self, size: u32); + fn policy_weight(&self) -> u32; + fn set_policy_weight(&self, size: u32); } pub(crate) struct EntryInfoFull { is_admitted: AtomicBool, last_accessed: AtomicInstant, last_modified: AtomicInstant, - weighted_size: AtomicU32, + policy_weight: AtomicU32, } impl EntryInfoFull { - fn new(weighted_size: u32) -> Self { + fn new(policy_weight: u32) -> Self { Self { is_admitted: Default::default(), last_accessed: Default::default(), last_modified: Default::default(), - weighted_size: AtomicU32::new(weighted_size), + policy_weight: AtomicU32::new(policy_weight), } } } @@ -93,13 +93,13 @@ impl EntryInfo for EntryInfoFull { } #[inline] - fn weighted_size(&self) -> u32 { - self.weighted_size.load(Ordering::Acquire) + fn policy_weight(&self) -> u32 { + self.policy_weight.load(Ordering::Acquire) } #[inline] - fn set_weighted_size(&self, size: u32) { - self.weighted_size.store(size, Ordering::Release); + fn set_policy_weight(&self, size: u32) { + self.policy_weight.store(size, Ordering::Release); } } @@ -225,10 +225,10 @@ pub(crate) struct ValueEntry { } impl ValueEntry { - pub(crate) fn new(value: V, weighted_size: u32) -> Self { + pub(crate) fn new(value: V, policy_weight: u32) -> Self { Self { value, - info: Arc::new(EntryInfoFull::new(weighted_size)), + info: Arc::new(EntryInfoFull::new(policy_weight)), nodes: Mutex::new(DeqNodes { access_order_q_node: None, write_order_q_node: None, @@ -236,7 +236,7 @@ impl ValueEntry { } } - pub(crate) fn new_with(value: V, weighted_size: u32, other: &Self) -> Self { + pub(crate) fn new_with(value: V, policy_weight: u32, other: &Self) -> Self { let nodes = { let other_nodes = other.nodes.lock(); DeqNodes { @@ -245,7 +245,7 @@ impl ValueEntry { } }; let info = Arc::clone(&other.info); - info.set_weighted_size(weighted_size); + info.set_policy_weight(policy_weight); // To prevent this updated ValueEntry from being evicted by an expiration policy, // set the max value to the timestamps. They will be replaced with the real // timestamps when applying writes. @@ -270,8 +270,8 @@ impl ValueEntry { } #[inline] - pub(crate) fn weighted_size(&self) -> u32 { - self.info.weighted_size() + pub(crate) fn policy_weight(&self) -> u32 { + self.info.policy_weight() } pub(crate) fn access_order_q_node(&self) -> Option> { @@ -381,8 +381,8 @@ pub(crate) enum WriteOp { Upsert { key_hash: KeyHash, value_entry: Arc>, - old_weighted_size: u32, - new_weighted_size: u32, + old_weight: u32, + new_weight: u32, }, Remove(KvEntry), } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 4d036765..19526fca 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -261,8 +261,8 @@ where WriteOp::Upsert { key_hash: KeyHash::new(Arc::clone(&key), hash), value_entry: Arc::clone(&entry), - old_weighted_size: 0, - new_weighted_size: ws, + old_weight: 0, + new_weight: ws, }, )); entry @@ -272,7 +272,7 @@ where // NOTE: `new_with` sets the max value to the last_accessed and last_modified // to prevent this updated ValueEntry from being evicted by an expiration policy. // See the comments in `new_with` for more details. - let old_weighted_size = old_entry.weighted_size(); + let old_weight = old_entry.policy_weight(); let entry = Arc::new(ValueEntry::new_with(value.clone(), ws, old_entry)); let cnt = op_cnt2.fetch_add(1, Ordering::Relaxed); op2 = Some(( @@ -281,8 +281,8 @@ where WriteOp::Upsert { key_hash: KeyHash::new(Arc::clone(&key), hash), value_entry: Arc::clone(&entry), - old_weighted_size, - new_weighted_size: ws, + old_weight, + new_weight: ws, }, )); entry @@ -381,20 +381,20 @@ impl WeightedSize { #[derive(Default)] struct EntrySizeAndFrequency { - weight: u64, + policy_weight: u64, freq: u32, } impl EntrySizeAndFrequency { fn new(policy_weight: u32) -> Self { Self { - weight: policy_weight as u64, + policy_weight: policy_weight as u64, ..Default::default() } } - fn add_policy_weight(&mut self, weighted_size: u32) { - self.weight += weighted_size as u64; + fn add_policy_weight(&mut self, weight: u32) { + self.policy_weight += weight as u64; } fn add_frequency(&mut self, freq: &FrequencySketch, hash: u64) { @@ -754,9 +754,9 @@ where Ok(Upsert { key_hash: kh, value_entry: entry, - old_weighted_size: old_size, - new_weighted_size: new_size, - }) => self.handle_upsert(kh, entry, old_size, new_size, ts, deqs, &freq, ws), + old_weight, + new_weight, + }) => self.handle_upsert(kh, entry, old_weight, new_weight, ts, deqs, &freq, ws), Ok(Remove(KvEntry { key: _key, entry })) => Self::handle_remove(deqs, entry, ws), Err(_) => break, }; @@ -768,8 +768,8 @@ where &self, kh: KeyHash, entry: Arc>, - old_policy_weight: u32, - new_policy_weight: u32, + old_weight: u32, + new_weight: u32, timestamp: Instant, deqs: &mut Deques, freq: &FrequencySketch, @@ -780,21 +780,21 @@ where if entry.is_admitted() { // The entry has been already admitted, so treat this as an update. - ws.saturating_add(new_policy_weight - old_policy_weight); + ws.saturating_add(new_weight - old_weight); deqs.move_to_back_ao(&entry); deqs.move_to_back_wo(&entry); return; } - if self.has_enough_capacity(new_policy_weight, ws) { + if self.has_enough_capacity(new_weight, ws) { // There are enough room in the cache (or the cache is unbounded). // Add the candidate to the deques. - self.handle_admit(kh, &entry, new_policy_weight, deqs, ws); + self.handle_admit(kh, &entry, new_weight, deqs, ws); return; } if let Some(max) = self.max_capacity { - if new_policy_weight as u64 > max { + if new_weight as u64 > max { // The candidate is too big to fit in the cache. Reject it. self.cache.remove(&Arc::clone(&kh.key)); return; @@ -802,7 +802,7 @@ where } let skipped_nodes; - let mut candidate = EntrySizeAndFrequency::new(new_policy_weight); + let mut candidate = EntrySizeAndFrequency::new(new_weight); candidate.add_frequency(freq, kh.hash); // Try to admit the candidate. @@ -811,8 +811,6 @@ where victim_nodes, skipped_nodes: mut skipped, } => { - // TODO: Try not to recalculate weights in handle_remove and handle_admit. - // Try to remove the victims from the cache (hash map). for victim in victim_nodes { if let Some((_vic_key, vic_entry)) = self @@ -831,7 +829,7 @@ where skipped_nodes = skipped; // Add the candidate to the deques. - self.handle_admit(kh, &entry, new_policy_weight, deqs, ws); + self.handle_admit(kh, &entry, new_weight, deqs, ws); } AdmissionResult::Rejected { skipped_nodes: s } => { skipped_nodes = s; @@ -882,7 +880,7 @@ where let mut next_victim = deqs.probation.peek_front(); // Aggregate potential victims. - while victims.weight < candidate.weight { + while victims.policy_weight < candidate.policy_weight { if candidate.freq < victims.freq { break; } @@ -890,7 +888,7 @@ where next_victim = victim.next_node(); if let Some(vic_entry) = cache.get(&victim.element.key) { - victims.add_policy_weight(vic_entry.weighted_size()); + victims.add_policy_weight(vic_entry.policy_weight()); victims.add_frequency(freq, victim.element.hash); victim_nodes.push(NonNull::from(victim)); retries = 0; @@ -915,7 +913,7 @@ where // TODO: Implement some randomness to mitigate hash DoS attack. // See Caffeine's implementation. - if victims.weight >= candidate.weight && candidate.freq > victims.freq { + if victims.policy_weight >= candidate.policy_weight && candidate.freq > victims.freq { AdmissionResult::Admitted { victim_nodes, skipped_nodes, @@ -949,7 +947,7 @@ where fn handle_remove(deqs: &mut Deques, entry: Arc>, ws: &mut WeightedSize) { if entry.is_admitted() { entry.set_is_admitted(false); - ws.saturating_sub(entry.weighted_size()); + ws.saturating_sub(entry.policy_weight()); deqs.unlink_ao(&entry); Deques::unlink_wo(&mut deqs.write_order, &entry); } @@ -965,7 +963,7 @@ where ) { if entry.is_admitted() { entry.set_is_admitted(false); - ws.saturating_sub(entry.weighted_size()); + ws.saturating_sub(entry.policy_weight()); Deques::unlink_ao_from_deque(ao_deq_name, ao_deq, &entry); Deques::unlink_wo(wo_deq, &entry); } @@ -1234,7 +1232,7 @@ where }); if let Some(entry) = maybe_entry { - let weight = entry.weighted_size(); + let weight = entry.policy_weight(); Self::handle_remove_with_deques(DEQ_NAME, deq, write_order_deq, entry, ws); evicted = evicted.saturating_add(weight as u64); } else if !self.try_skip_updated_entry(&key, DEQ_NAME, deq, write_order_deq) { From e0b771d6dd5f4e86f1b2ae49227d398511451723 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Sun, 26 Dec 2021 20:53:52 +0800 Subject: [PATCH 33/42] Size-aware cache management Add `sync::ValueEntryBuilder` for `sync::BaseCache`. --- src/sync.rs | 228 ++++++++++++++++++----------------------- src/sync/base_cache.rs | 43 +++++++- src/sync/entry_info.rs | 149 +++++++++++++++++++++++++++ 3 files changed, 287 insertions(+), 133 deletions(-) create mode 100644 src/sync/entry_info.rs diff --git a/src/sync.rs b/src/sync.rs index 2e5b42f7..13212271 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -1,20 +1,15 @@ //! Provides thread-safe, blocking cache implementations. -use crate::common::{atomic_time::AtomicInstant, deque::DeqNode, time::Instant}; +use crate::common::{deque::DeqNode, time::Instant}; use parking_lot::Mutex; -use std::{ - ptr::NonNull, - sync::{ - atomic::{AtomicBool, AtomicU32, Ordering}, - Arc, - }, -}; +use std::{marker::PhantomData, ptr::NonNull, sync::Arc}; pub(crate) mod base_cache; mod builder; mod cache; mod deques; +mod entry_info; pub(crate) mod housekeeper; mod invalidator; mod segment; @@ -24,6 +19,8 @@ pub use builder::CacheBuilder; pub use cache::Cache; pub use segment::SegmentedCache; +use self::entry_info::{ArcEntryInfo, EntryInfo, EntryInfoFull, EntryInfoWo}; + /// The type of the unique ID to identify a predicate used by /// [`Cache#invalidate_entries_if`][invalidate-if] method. /// @@ -49,84 +46,6 @@ pub(crate) trait AccessTime { fn set_last_modified(&self, timestamp: Instant); } -pub(crate) trait EntryInfo: AccessTime { - fn is_admitted(&self) -> bool; - fn set_is_admitted(&self, value: bool); - fn reset_timestamps(&self); - fn policy_weight(&self) -> u32; - fn set_policy_weight(&self, size: u32); -} - -pub(crate) struct EntryInfoFull { - is_admitted: AtomicBool, - last_accessed: AtomicInstant, - last_modified: AtomicInstant, - policy_weight: AtomicU32, -} - -impl EntryInfoFull { - fn new(policy_weight: u32) -> Self { - Self { - is_admitted: Default::default(), - last_accessed: Default::default(), - last_modified: Default::default(), - policy_weight: AtomicU32::new(policy_weight), - } - } -} - -impl EntryInfo for EntryInfoFull { - #[inline] - fn is_admitted(&self) -> bool { - self.is_admitted.load(Ordering::Acquire) - } - - #[inline] - fn set_is_admitted(&self, value: bool) { - self.is_admitted.store(value, Ordering::Release); - } - - #[inline] - fn reset_timestamps(&self) { - self.last_accessed.reset(); - self.last_modified.reset(); - } - - #[inline] - fn policy_weight(&self) -> u32 { - self.policy_weight.load(Ordering::Acquire) - } - - #[inline] - fn set_policy_weight(&self, size: u32) { - self.policy_weight.store(size, Ordering::Release); - } -} - -impl AccessTime for EntryInfoFull { - #[inline] - fn last_accessed(&self) -> Option { - self.last_accessed.instant() - } - - #[inline] - fn set_last_accessed(&self, timestamp: Instant) { - self.last_accessed.set_instant(timestamp); - } - - #[inline] - fn last_modified(&self) -> Option { - self.last_modified.instant() - } - - #[inline] - fn set_last_modified(&self, timestamp: Instant) { - self.last_modified.set_instant(timestamp); - } -} - -pub(crate) type ArcedEntryInfo = Arc; - pub(crate) struct KeyHash { pub(crate) key: Arc, pub(crate) hash: u64, @@ -149,11 +68,11 @@ impl Clone for KeyHash { pub(crate) struct KeyDate { key: Arc, - entry_info: ArcedEntryInfo, + entry_info: ArcEntryInfo, } impl KeyDate { - pub(crate) fn new(key: Arc, entry_info: &ArcedEntryInfo) -> Self { + pub(crate) fn new(key: Arc, entry_info: &ArcEntryInfo) -> Self { Self { key, entry_info: Arc::clone(entry_info), @@ -176,7 +95,7 @@ pub(crate) struct KeyHashDate { } impl KeyHashDate { - pub(crate) fn new(kh: KeyHash, entry_info: &ArcedEntryInfo) -> Self { + pub(crate) fn new(kh: KeyHash, entry_info: &ArcEntryInfo) -> Self { Self { key: kh.key, hash: kh.hash, @@ -188,7 +107,7 @@ impl KeyHashDate { &self.key } - pub(crate) fn entry_info(&self) -> &ArcedEntryInfo { + pub(crate) fn entry_info(&self) -> &ArcEntryInfo { &self.entry_info } } @@ -204,6 +123,50 @@ impl KvEntry { } } +impl AccessTime for DeqNode> { + #[inline] + fn last_accessed(&self) -> Option { + None + } + + #[inline] + fn set_last_accessed(&self, _timestamp: Instant) { + unreachable!(); + } + + #[inline] + fn last_modified(&self) -> Option { + self.element.entry_info.last_modified() + } + + #[inline] + fn set_last_modified(&self, timestamp: Instant) { + self.element.entry_info.set_last_modified(timestamp); + } +} + +impl AccessTime for DeqNode> { + #[inline] + fn last_accessed(&self) -> Option { + self.element.entry_info.last_accessed() + } + + #[inline] + fn set_last_accessed(&self, timestamp: Instant) { + self.element.entry_info.set_last_accessed(timestamp); + } + + #[inline] + fn last_modified(&self) -> Option { + None + } + + #[inline] + fn set_last_modified(&self, _timestamp: Instant) { + unreachable!(); + } +} + // DeqNode for an access order queue. type KeyDeqNodeAo = NonNull>>; @@ -220,15 +183,15 @@ unsafe impl Send for DeqNodes {} pub(crate) struct ValueEntry { pub(crate) value: V, - info: ArcedEntryInfo, + info: ArcEntryInfo, nodes: Mutex>, } impl ValueEntry { - pub(crate) fn new(value: V, policy_weight: u32) -> Self { + fn new(value: V, entry_info: ArcEntryInfo) -> Self { Self { value, - info: Arc::new(EntryInfoFull::new(policy_weight)), + info: entry_info, nodes: Mutex::new(DeqNodes { access_order_q_node: None, write_order_q_node: None, @@ -236,7 +199,7 @@ impl ValueEntry { } } - pub(crate) fn new_with(value: V, policy_weight: u32, other: &Self) -> Self { + fn new_from(value: V, entry_info: ArcEntryInfo, other: &Self) -> Self { let nodes = { let other_nodes = other.nodes.lock(); DeqNodes { @@ -244,20 +207,18 @@ impl ValueEntry { write_order_q_node: other_nodes.write_order_q_node, } }; - let info = Arc::clone(&other.info); - info.set_policy_weight(policy_weight); // To prevent this updated ValueEntry from being evicted by an expiration policy, // set the max value to the timestamps. They will be replaced with the real // timestamps when applying writes. - info.reset_timestamps(); + entry_info.reset_timestamps(); Self { value, - info, + info: entry_info, nodes: Mutex::new(nodes), } } - pub(crate) fn entry_info(&self) -> &ArcedEntryInfo { + pub(crate) fn entry_info(&self) -> &ArcEntryInfo { &self.info } @@ -327,47 +288,58 @@ impl AccessTime for Arc> { } } -impl AccessTime for DeqNode> { - #[inline] - fn last_accessed(&self) -> Option { - None - } +pub(crate) trait ValueEntryBuilder { + fn build(&self, value: V, policy_weight: u32) -> ValueEntry; - #[inline] - fn set_last_accessed(&self, _timestamp: Instant) { - unreachable!(); - } + fn build_from( + &self, + value: V, + policy_weight: u32, + other: &ValueEntry, + ) -> ValueEntry; +} - #[inline] - fn last_modified(&self) -> Option { - self.element.entry_info.last_modified() - } +pub(crate) struct ValueEntryBuilderImpl(PhantomData<(K, V, EI)>); - #[inline] - fn set_last_modified(&self, timestamp: Instant) { - self.element.entry_info.set_last_modified(timestamp); +impl ValueEntryBuilderImpl { + pub(crate) fn new() -> Self { + Self(PhantomData::default()) } } -impl AccessTime for DeqNode> { - #[inline] - fn last_accessed(&self) -> Option { - self.element.entry_info.last_accessed() +impl ValueEntryBuilder for ValueEntryBuilderImpl { + fn build(&self, value: V, policy_weight: u32) -> ValueEntry { + let info = Arc::new(EntryInfoFull::new(policy_weight)); + ValueEntry::new(value, info) } - #[inline] - fn set_last_accessed(&self, timestamp: Instant) { - self.element.entry_info.set_last_accessed(timestamp); + fn build_from( + &self, + value: V, + policy_weight: u32, + other: &ValueEntry, + ) -> ValueEntry { + let info = Arc::clone(&other.info); + info.set_policy_weight(policy_weight); + ValueEntry::new_from(value, info, other) } +} - #[inline] - fn last_modified(&self) -> Option { - None +impl ValueEntryBuilder for ValueEntryBuilderImpl { + fn build(&self, value: V, policy_weight: u32) -> ValueEntry { + let info = Arc::new(EntryInfoWo::new(policy_weight)); + ValueEntry::new(value, info) } - #[inline] - fn set_last_modified(&self, _timestamp: Instant) { - unreachable!(); + fn build_from( + &self, + value: V, + policy_weight: u32, + other: &ValueEntry, + ) -> ValueEntry { + let info = Arc::clone(&other.info); + info.set_policy_weight(policy_weight); + ValueEntry::new_from(value, info, other) } } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 19526fca..30cd55aa 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -1,9 +1,10 @@ use super::{ deques::Deques, + entry_info::EntryInfoWo, housekeeper::{Housekeeper, InnerSync, SyncPace}, invalidator::{GetOrRemoveEntry, InvalidationResult, Invalidator, KeyDateLite, PredicateFun}, - AccessTime, KeyDate, KeyHash, KeyHashDate, KvEntry, PredicateId, ReadOp, ValueEntry, Weigher, - WriteOp, + AccessTime, EntryInfoFull, KeyDate, KeyHash, KeyHashDate, KvEntry, PredicateId, ReadOp, + ValueEntry, ValueEntryBuilder, ValueEntryBuilderImpl, Weigher, WriteOp, }; use crate::{ common::{ @@ -254,7 +255,8 @@ where Arc::clone(&key), // on_insert || { - let entry = Arc::new(ValueEntry::new(value.clone(), ws)); + // let entry = Arc::new(ValueEntry::new(value.clone(), ws)); + let entry = self.new_value_entry(value.clone(), ws); let cnt = op_cnt1.fetch_add(1, Ordering::Relaxed); op1 = Some(( cnt, @@ -273,7 +275,8 @@ where // to prevent this updated ValueEntry from being evicted by an expiration policy. // See the comments in `new_with` for more details. let old_weight = old_entry.policy_weight(); - let entry = Arc::new(ValueEntry::new_with(value.clone(), ws, old_entry)); + // let entry = Arc::new(ValueEntry::new_with(value.clone(), ws, old_entry)); + let entry = self.new_value_entry_from(value.clone(), ws, old_entry); let cnt = op_cnt2.fetch_add(1, Ordering::Relaxed); op2 = Some(( cnt, @@ -307,6 +310,25 @@ where } } + #[inline] + fn new_value_entry(&self, value: V, policy_weight: u32) -> Arc> { + Arc::new(self.inner.value_entry_builder.build(value, policy_weight)) + } + + #[inline] + fn new_value_entry_from( + &self, + value: V, + policy_weight: u32, + other: &ValueEntry, + ) -> Arc> { + Arc::new( + self.inner + .value_entry_builder + .build_from(value, policy_weight, other), + ) + } + #[inline] fn apply_reads_if_needed(&self) { let len = self.read_op_ch.len(); @@ -419,11 +441,14 @@ type CacheStore = moka_cht::SegmentedHashMap, Arc = (Arc, Arc>); +type ArcValueEntryBuilder = Arc + Send + Sync + 'static>; + pub(crate) struct Inner { max_capacity: Option, weighted_size: AtomicCell, cache: CacheStore, build_hasher: S, + value_entry_builder: ArcValueEntryBuilder, deques: Mutex>, frequency_sketch: RwLock, read_op_ch: Receiver>, @@ -441,7 +466,8 @@ pub(crate) struct Inner { // functions/methods used by BaseCache impl Inner where - K: Hash + Eq, + K: Hash + Eq + Send + Sync + 'static, + V: Send + Sync + 'static, S: BuildHasher + Clone, { // Disable a Clippy warning for having more than seven arguments. @@ -468,6 +494,12 @@ where build_hasher.clone(), ); + let value_entry_builder: ArcValueEntryBuilder = if weigher.is_some() { + Arc::new(ValueEntryBuilderImpl::::new()) + } else { + Arc::new(ValueEntryBuilderImpl::::new()) + }; + // Ensure skt_capacity fits in a range of `128u32..=u32::MAX`. let skt_capacity = max_capacity .map(|n| n.try_into().unwrap_or(u32::MAX)) // Convert to u32. @@ -480,6 +512,7 @@ where weighted_size: AtomicCell::default(), cache, build_hasher, + value_entry_builder, deques: Mutex::new(Deques::default()), frequency_sketch: RwLock::new(frequency_sketch), read_op_ch, diff --git a/src/sync/entry_info.rs b/src/sync/entry_info.rs new file mode 100644 index 00000000..2f6c8cd3 --- /dev/null +++ b/src/sync/entry_info.rs @@ -0,0 +1,149 @@ +use std::sync::{ + atomic::{AtomicBool, AtomicU32, Ordering}, + Arc, +}; + +use super::AccessTime; +use crate::common::{atomic_time::AtomicInstant, time::Instant}; + +pub(crate) trait EntryInfo: AccessTime { + fn is_admitted(&self) -> bool; + fn set_is_admitted(&self, value: bool); + fn reset_timestamps(&self); + fn policy_weight(&self) -> u32; + fn set_policy_weight(&self, size: u32); +} + +pub(crate) type ArcEntryInfo = Arc; + +pub(crate) struct EntryInfoFull { + is_admitted: AtomicBool, + last_accessed: AtomicInstant, + last_modified: AtomicInstant, + policy_weight: AtomicU32, +} + +impl EntryInfoFull { + pub(crate) fn new(policy_weight: u32) -> Self { + Self { + is_admitted: Default::default(), + last_accessed: Default::default(), + last_modified: Default::default(), + policy_weight: AtomicU32::new(policy_weight), + } + } +} + +impl EntryInfo for EntryInfoFull { + #[inline] + fn is_admitted(&self) -> bool { + self.is_admitted.load(Ordering::Acquire) + } + + #[inline] + fn set_is_admitted(&self, value: bool) { + self.is_admitted.store(value, Ordering::Release); + } + + #[inline] + fn reset_timestamps(&self) { + self.last_accessed.reset(); + self.last_modified.reset(); + } + + #[inline] + fn policy_weight(&self) -> u32 { + self.policy_weight.load(Ordering::Acquire) + } + + #[inline] + fn set_policy_weight(&self, size: u32) { + self.policy_weight.store(size, Ordering::Release); + } +} + +impl AccessTime for EntryInfoFull { + #[inline] + fn last_accessed(&self) -> Option { + self.last_accessed.instant() + } + + #[inline] + fn set_last_accessed(&self, timestamp: Instant) { + self.last_accessed.set_instant(timestamp); + } + + #[inline] + fn last_modified(&self) -> Option { + self.last_modified.instant() + } + + #[inline] + fn set_last_modified(&self, timestamp: Instant) { + self.last_modified.set_instant(timestamp); + } +} + +pub(crate) struct EntryInfoWo { + is_admitted: AtomicBool, + last_accessed: AtomicInstant, + last_modified: AtomicInstant, +} + +impl EntryInfoWo { + pub(crate) fn new(_policy_weight: u32) -> Self { + Self { + is_admitted: Default::default(), + last_accessed: Default::default(), + last_modified: Default::default(), + } + } +} + +impl EntryInfo for EntryInfoWo { + #[inline] + fn is_admitted(&self) -> bool { + self.is_admitted.load(Ordering::Acquire) + } + + #[inline] + fn set_is_admitted(&self, value: bool) { + self.is_admitted.store(value, Ordering::Release); + } + + #[inline] + fn reset_timestamps(&self) { + self.last_accessed.reset(); + self.last_modified.reset(); + } + + #[inline] + fn policy_weight(&self) -> u32 { + 1 + } + + #[inline] + fn set_policy_weight(&self, _size: u32) {} +} + +impl AccessTime for EntryInfoWo { + #[inline] + fn last_accessed(&self) -> Option { + self.last_accessed.instant() + } + + #[inline] + fn set_last_accessed(&self, timestamp: Instant) { + self.last_accessed.set_instant(timestamp); + } + + #[inline] + fn last_modified(&self) -> Option { + self.last_modified.instant() + } + + #[inline] + fn set_last_modified(&self, timestamp: Instant) { + self.last_modified.set_instant(timestamp); + } +} From 23985d4ac13a3eaeaac85cad004ba63e733e33fe Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Wed, 29 Dec 2021 21:11:43 +0800 Subject: [PATCH 34/42] Size-aware cache management Some clean up --- src/sync.rs | 2 +- src/sync/base_cache.rs | 10 +++++----- src/unsync/cache.rs | 21 --------------------- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index 13212271..f37e92f5 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -91,7 +91,7 @@ impl KeyDate { pub(crate) struct KeyHashDate { key: Arc, hash: u64, - entry_info: Arc, + entry_info: ArcEntryInfo, } impl KeyHashDate { diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 30cd55aa..27873e47 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -441,14 +441,14 @@ type CacheStore = moka_cht::SegmentedHashMap, Arc = (Arc, Arc>); -type ArcValueEntryBuilder = Arc + Send + Sync + 'static>; +type BoxedValueEntryBuilder = Box + Send + Sync + 'static>; pub(crate) struct Inner { max_capacity: Option, weighted_size: AtomicCell, cache: CacheStore, build_hasher: S, - value_entry_builder: ArcValueEntryBuilder, + value_entry_builder: BoxedValueEntryBuilder, deques: Mutex>, frequency_sketch: RwLock, read_op_ch: Receiver>, @@ -494,10 +494,10 @@ where build_hasher.clone(), ); - let value_entry_builder: ArcValueEntryBuilder = if weigher.is_some() { - Arc::new(ValueEntryBuilderImpl::::new()) + let value_entry_builder: BoxedValueEntryBuilder = if weigher.is_some() { + Box::new(ValueEntryBuilderImpl::::new()) } else { - Arc::new(ValueEntryBuilderImpl::::new()) + Box::new(ValueEntryBuilderImpl::::new()) }; // Ensure skt_capacity fits in a range of `128u32..=u32::MAX`. diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index 9e640cc7..745dddb2 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -504,27 +504,6 @@ where } } - // #[inline] - // fn find_cache_victim<'a>( - // deqs: &'a mut Deques, - // _freq: &FrequencySketch, - // ) -> &'a DeqNode> { - // // TODO: Check its frequency. If it is not very low, maybe we should - // // check frequencies of next few others and pick from them. - // deqs.probation.peek_front().expect("No victim found") - // } - - // #[inline] - // fn admit( - // candidate_hash: u64, - // victim: &DeqNode>, - // freq: &FrequencySketch, - // ) -> bool { - // // TODO: Implement some randomness to mitigate hash DoS attack. - // // See Caffeine's implementation. - // freq.frequency(candidate_hash) > freq.frequency(victim.element.hash) - // } - /// Performs size-aware admission explained in the paper: /// [Lightweight Robust Size Aware Cache Management][size-aware-cache-paper] /// by Gil Einziger, Ohad Eytan, Roy Friedman, Ben Manes. From 9c8d50d3a20241240381a79029aab014e0f09298 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Thu, 30 Dec 2021 10:38:20 +0800 Subject: [PATCH 35/42] Size-aware cache management Change `sync::ValueEntryBuilder` and `sync::entry_info::EntryInfo` from dynamic dispatch (by trait objects) to enum dispatch. A simple benchmark showed enum dispatch is slightly (a few %) faster than dynamic dispatch in our use case. --- src/sync.rs | 79 ++++++++------------ src/sync/base_cache.rs | 35 ++++----- src/sync/entry_info.rs | 162 ++++++++++++++++++++--------------------- 3 files changed, 125 insertions(+), 151 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index f37e92f5..854bbe5e 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -3,7 +3,7 @@ use crate::common::{deque::DeqNode, time::Instant}; use parking_lot::Mutex; -use std::{marker::PhantomData, ptr::NonNull, sync::Arc}; +use std::{ptr::NonNull, sync::Arc}; pub(crate) mod base_cache; mod builder; @@ -19,7 +19,7 @@ pub use builder::CacheBuilder; pub use cache::Cache; pub use segment::SegmentedCache; -use self::entry_info::{ArcEntryInfo, EntryInfo, EntryInfoFull, EntryInfoWo}; +use self::entry_info::EntryInfo; /// The type of the unique ID to identify a predicate used by /// [`Cache#invalidate_entries_if`][invalidate-if] method. @@ -68,14 +68,14 @@ impl Clone for KeyHash { pub(crate) struct KeyDate { key: Arc, - entry_info: ArcEntryInfo, + entry_info: EntryInfo, } impl KeyDate { - pub(crate) fn new(key: Arc, entry_info: &ArcEntryInfo) -> Self { + pub(crate) fn new(key: Arc, entry_info: &EntryInfo) -> Self { Self { key, - entry_info: Arc::clone(entry_info), + entry_info: entry_info.clone(), } } @@ -91,15 +91,15 @@ impl KeyDate { pub(crate) struct KeyHashDate { key: Arc, hash: u64, - entry_info: ArcEntryInfo, + entry_info: EntryInfo, } impl KeyHashDate { - pub(crate) fn new(kh: KeyHash, entry_info: &ArcEntryInfo) -> Self { + pub(crate) fn new(kh: KeyHash, entry_info: &EntryInfo) -> Self { Self { key: kh.key, hash: kh.hash, - entry_info: Arc::clone(entry_info), + entry_info: entry_info.clone(), } } @@ -107,7 +107,7 @@ impl KeyHashDate { &self.key } - pub(crate) fn entry_info(&self) -> &ArcEntryInfo { + pub(crate) fn entry_info(&self) -> &EntryInfo { &self.entry_info } } @@ -183,12 +183,12 @@ unsafe impl Send for DeqNodes {} pub(crate) struct ValueEntry { pub(crate) value: V, - info: ArcEntryInfo, + info: EntryInfo, nodes: Mutex>, } impl ValueEntry { - fn new(value: V, entry_info: ArcEntryInfo) -> Self { + fn new(value: V, entry_info: EntryInfo) -> Self { Self { value, info: entry_info, @@ -199,7 +199,7 @@ impl ValueEntry { } } - fn new_from(value: V, entry_info: ArcEntryInfo, other: &Self) -> Self { + fn new_from(value: V, entry_info: EntryInfo, other: &Self) -> Self { let nodes = { let other_nodes = other.nodes.lock(); DeqNodes { @@ -218,7 +218,7 @@ impl ValueEntry { } } - pub(crate) fn entry_info(&self) -> &ArcEntryInfo { + pub(crate) fn entry_info(&self) -> &EntryInfo { &self.info } @@ -288,56 +288,41 @@ impl AccessTime for Arc> { } } -pub(crate) trait ValueEntryBuilder { - fn build(&self, value: V, policy_weight: u32) -> ValueEntry; - - fn build_from( - &self, - value: V, - policy_weight: u32, - other: &ValueEntry, - ) -> ValueEntry; +#[derive(Clone, Copy, Debug)] +pub(crate) enum CacheFeatures { + Plain, + Weighted, } -pub(crate) struct ValueEntryBuilderImpl(PhantomData<(K, V, EI)>); - -impl ValueEntryBuilderImpl { - pub(crate) fn new() -> Self { - Self(PhantomData::default()) +impl CacheFeatures { + pub(crate) fn new(is_weighter_defined: bool) -> Self { + if is_weighter_defined { + Self::Weighted + } else { + Self::Plain + } } } -impl ValueEntryBuilder for ValueEntryBuilderImpl { - fn build(&self, value: V, policy_weight: u32) -> ValueEntry { - let info = Arc::new(EntryInfoFull::new(policy_weight)); - ValueEntry::new(value, info) - } +pub(crate) struct ValueEntryBuilder(CacheFeatures); - fn build_from( - &self, - value: V, - policy_weight: u32, - other: &ValueEntry, - ) -> ValueEntry { - let info = Arc::clone(&other.info); - info.set_policy_weight(policy_weight); - ValueEntry::new_from(value, info, other) +impl ValueEntryBuilder { + pub(crate) fn new(features: CacheFeatures) -> Self { + Self(features) } -} -impl ValueEntryBuilder for ValueEntryBuilderImpl { - fn build(&self, value: V, policy_weight: u32) -> ValueEntry { - let info = Arc::new(EntryInfoWo::new(policy_weight)); + pub(crate) fn build(&self, value: V, policy_weight: u32) -> ValueEntry { + let info = EntryInfo::new(self.0, policy_weight); ValueEntry::new(value, info) } - fn build_from( + pub(crate) fn build_from( &self, value: V, policy_weight: u32, other: &ValueEntry, ) -> ValueEntry { - let info = Arc::clone(&other.info); + let info = other.info.clone(); info.set_policy_weight(policy_weight); ValueEntry::new_from(value, info, other) } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 27873e47..7b1b4a4d 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -1,10 +1,9 @@ use super::{ deques::Deques, - entry_info::EntryInfoWo, housekeeper::{Housekeeper, InnerSync, SyncPace}, invalidator::{GetOrRemoveEntry, InvalidationResult, Invalidator, KeyDateLite, PredicateFun}, - AccessTime, EntryInfoFull, KeyDate, KeyHash, KeyHashDate, KvEntry, PredicateId, ReadOp, - ValueEntry, ValueEntryBuilder, ValueEntryBuilderImpl, Weigher, WriteOp, + AccessTime, CacheFeatures, KeyDate, KeyHash, KeyHashDate, KvEntry, PredicateId, ReadOp, + ValueEntry, ValueEntryBuilder, Weigher, WriteOp, }; use crate::{ common::{ @@ -237,7 +236,7 @@ where #[inline] pub(crate) fn do_insert_with_hash(&self, key: Arc, hash: u64, value: V) -> WriteOp { - let ws = self.inner.weigh(&key, &value); + let weight = self.inner.weigh(&key, &value); let op_cnt1 = Rc::new(AtomicU8::new(0)); let op_cnt2 = Rc::clone(&op_cnt1); let mut op1 = None; @@ -256,7 +255,7 @@ where // on_insert || { // let entry = Arc::new(ValueEntry::new(value.clone(), ws)); - let entry = self.new_value_entry(value.clone(), ws); + let entry = self.new_value_entry(value.clone(), weight); let cnt = op_cnt1.fetch_add(1, Ordering::Relaxed); op1 = Some(( cnt, @@ -264,19 +263,20 @@ where key_hash: KeyHash::new(Arc::clone(&key), hash), value_entry: Arc::clone(&entry), old_weight: 0, - new_weight: ws, + new_weight: weight, }, )); entry }, // on_modify |_k, old_entry| { - // NOTE: `new_with` sets the max value to the last_accessed and last_modified - // to prevent this updated ValueEntry from being evicted by an expiration policy. - // See the comments in `new_with` for more details. + // NOTES on `new_value_entry_from` method: + // 1. The internal EntryInfo will be shared between the old and new ValueEntries. + // 2. This method will set the last_accessed and last_modified to the max value to + // prevent this new ValueEntry from being evicted by an expiration policy. + // 3. This method will update the policy_weight with the new weight. let old_weight = old_entry.policy_weight(); - // let entry = Arc::new(ValueEntry::new_with(value.clone(), ws, old_entry)); - let entry = self.new_value_entry_from(value.clone(), ws, old_entry); + let entry = self.new_value_entry_from(value.clone(), weight, old_entry); let cnt = op_cnt2.fetch_add(1, Ordering::Relaxed); op2 = Some(( cnt, @@ -285,7 +285,7 @@ where key_hash: KeyHash::new(Arc::clone(&key), hash), value_entry: Arc::clone(&entry), old_weight, - new_weight: ws, + new_weight: weight, }, )); entry @@ -441,14 +441,14 @@ type CacheStore = moka_cht::SegmentedHashMap, Arc = (Arc, Arc>); -type BoxedValueEntryBuilder = Box + Send + Sync + 'static>; +// type BoxedValueEntryBuilder = Box + Send + Sync + 'static>; pub(crate) struct Inner { max_capacity: Option, weighted_size: AtomicCell, cache: CacheStore, build_hasher: S, - value_entry_builder: BoxedValueEntryBuilder, + value_entry_builder: ValueEntryBuilder, deques: Mutex>, frequency_sketch: RwLock, read_op_ch: Receiver>, @@ -494,11 +494,8 @@ where build_hasher.clone(), ); - let value_entry_builder: BoxedValueEntryBuilder = if weigher.is_some() { - Box::new(ValueEntryBuilderImpl::::new()) - } else { - Box::new(ValueEntryBuilderImpl::::new()) - }; + let features = CacheFeatures::new(weigher.is_some()); + let value_entry_builder = ValueEntryBuilder::new(features); // Ensure skt_capacity fits in a range of `128u32..=u32::MAX`. let skt_capacity = max_capacity diff --git a/src/sync/entry_info.rs b/src/sync/entry_info.rs index 2f6c8cd3..d20628c6 100644 --- a/src/sync/entry_info.rs +++ b/src/sync/entry_info.rs @@ -3,27 +3,29 @@ use std::sync::{ Arc, }; -use super::AccessTime; +use super::{AccessTime, CacheFeatures}; use crate::common::{atomic_time::AtomicInstant, time::Instant}; -pub(crate) trait EntryInfo: AccessTime { - fn is_admitted(&self) -> bool; - fn set_is_admitted(&self, value: bool); - fn reset_timestamps(&self); - fn policy_weight(&self) -> u32; - fn set_policy_weight(&self, size: u32); +pub(crate) enum EntryInfo { + Plain(Arc), + Weighted(Arc), } -pub(crate) type ArcEntryInfo = Arc; +#[derive(Default)] +pub(crate) struct Plain { + is_admitted: AtomicBool, + last_accessed: AtomicInstant, + last_modified: AtomicInstant, +} -pub(crate) struct EntryInfoFull { +pub(crate) struct Weighted { is_admitted: AtomicBool, last_accessed: AtomicInstant, last_modified: AtomicInstant, policy_weight: AtomicU32, } -impl EntryInfoFull { +impl Weighted { pub(crate) fn new(policy_weight: u32) -> Self { Self { is_admitted: Default::default(), @@ -34,116 +36,106 @@ impl EntryInfoFull { } } -impl EntryInfo for EntryInfoFull { - #[inline] - fn is_admitted(&self) -> bool { - self.is_admitted.load(Ordering::Acquire) - } - - #[inline] - fn set_is_admitted(&self, value: bool) { - self.is_admitted.store(value, Ordering::Release); - } - - #[inline] - fn reset_timestamps(&self) { - self.last_accessed.reset(); - self.last_modified.reset(); - } - - #[inline] - fn policy_weight(&self) -> u32 { - self.policy_weight.load(Ordering::Acquire) - } - - #[inline] - fn set_policy_weight(&self, size: u32) { - self.policy_weight.store(size, Ordering::Release); +impl Clone for EntryInfo { + fn clone(&self) -> Self { + match self { + Self::Plain(ei) => Self::Plain(Arc::clone(ei)), + Self::Weighted(ei) => Self::Weighted(Arc::clone(ei)), + } } } -impl AccessTime for EntryInfoFull { +impl EntryInfo { #[inline] - fn last_accessed(&self) -> Option { - self.last_accessed.instant() + pub(crate) fn new(features: CacheFeatures, policy_weight: u32) -> Self { + match features { + CacheFeatures::Plain => Self::Plain(Arc::new(Plain::default())), + CacheFeatures::Weighted => Self::Weighted(Arc::new(Weighted::new(policy_weight))), + } } #[inline] - fn set_last_accessed(&self, timestamp: Instant) { - self.last_accessed.set_instant(timestamp); + pub(crate) fn is_admitted(&self) -> bool { + let v = match self { + Self::Plain(ei) => &ei.is_admitted, + Self::Weighted(ei) => &ei.is_admitted, + }; + v.load(Ordering::Acquire) } #[inline] - fn last_modified(&self) -> Option { - self.last_modified.instant() + pub(crate) fn set_is_admitted(&self, value: bool) { + let v = match self { + Self::Plain(ei) => &ei.is_admitted, + Self::Weighted(ei) => &ei.is_admitted, + }; + v.store(value, Ordering::Release); } #[inline] - fn set_last_modified(&self, timestamp: Instant) { - self.last_modified.set_instant(timestamp); - } -} - -pub(crate) struct EntryInfoWo { - is_admitted: AtomicBool, - last_accessed: AtomicInstant, - last_modified: AtomicInstant, -} - -impl EntryInfoWo { - pub(crate) fn new(_policy_weight: u32) -> Self { - Self { - is_admitted: Default::default(), - last_accessed: Default::default(), - last_modified: Default::default(), + pub(crate) fn reset_timestamps(&self) { + match self { + Self::Plain(ei) => { + ei.last_accessed.reset(); + ei.last_accessed.reset(); + } + Self::Weighted(ei) => { + ei.last_accessed.reset(); + ei.last_modified.reset(); + } } } -} -impl EntryInfo for EntryInfoWo { #[inline] - fn is_admitted(&self) -> bool { - self.is_admitted.load(Ordering::Acquire) - } - - #[inline] - fn set_is_admitted(&self, value: bool) { - self.is_admitted.store(value, Ordering::Release); - } - - #[inline] - fn reset_timestamps(&self) { - self.last_accessed.reset(); - self.last_modified.reset(); + pub(crate) fn policy_weight(&self) -> u32 { + match self { + Self::Plain(_) => 1, + Self::Weighted(ei) => ei.policy_weight.load(Ordering::Acquire), + } } - #[inline] - fn policy_weight(&self) -> u32 { - 1 + pub(crate) fn set_policy_weight(&self, size: u32) { + match self { + Self::Plain(_) => (), + Self::Weighted(ei) => ei.policy_weight.store(size, Ordering::Release), + } } - - #[inline] - fn set_policy_weight(&self, _size: u32) {} } -impl AccessTime for EntryInfoWo { +impl AccessTime for EntryInfo { #[inline] fn last_accessed(&self) -> Option { - self.last_accessed.instant() + let v = match self { + Self::Plain(ei) => &ei.last_accessed, + Self::Weighted(ei) => &ei.last_accessed, + }; + v.instant() } #[inline] fn set_last_accessed(&self, timestamp: Instant) { - self.last_accessed.set_instant(timestamp); + let v = match self { + Self::Plain(ei) => &ei.last_accessed, + Self::Weighted(ei) => &ei.last_accessed, + }; + v.set_instant(timestamp); } #[inline] fn last_modified(&self) -> Option { - self.last_modified.instant() + let v = match self { + Self::Plain(ei) => &ei.last_modified, + Self::Weighted(ei) => &ei.last_modified, + }; + v.instant() } #[inline] fn set_last_modified(&self, timestamp: Instant) { - self.last_modified.set_instant(timestamp); + let v = match self { + Self::Plain(ei) => &ei.last_modified, + Self::Weighted(ei) => &ei.last_modified, + }; + v.set_instant(timestamp); } } From 887c5bf5d94d308e0e5350d68246a5af2901f44f Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Thu, 30 Dec 2021 18:30:34 +0800 Subject: [PATCH 36/42] Size-aware cache management Finish implementing size-aware cache management on `unsync::Cache`. --- src/unsync.rs | 40 ++++++++++++++++++++++++++-------------- src/unsync/cache.rs | 35 +++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/src/unsync.rs b/src/unsync.rs index 82440131..e1052fa0 100644 --- a/src/unsync.rs +++ b/src/unsync.rs @@ -53,61 +53,73 @@ type KeyDeqNodeAo = NonNull>>; // DeqNode for the write order queue. type KeyDeqNodeWo = NonNull>>; -struct DeqNodes { +struct EntryInfo { access_order_q_node: Option>, write_order_q_node: Option>, + policy_weight: u32, } pub(crate) struct ValueEntry { pub(crate) value: V, - deq_nodes: DeqNodes, + info: EntryInfo, } impl ValueEntry { - pub(crate) fn new(value: V) -> Self { + pub(crate) fn new(value: V, policy_weight: u32) -> Self { Self { value, - deq_nodes: DeqNodes { + info: EntryInfo { access_order_q_node: None, write_order_q_node: None, + policy_weight, }, } } #[inline] pub(crate) fn replace_deq_nodes_with(&mut self, mut other: Self) { - self.deq_nodes.access_order_q_node = other.deq_nodes.access_order_q_node.take(); - self.deq_nodes.write_order_q_node = other.deq_nodes.write_order_q_node.take(); + self.info.access_order_q_node = other.info.access_order_q_node.take(); + self.info.write_order_q_node = other.info.write_order_q_node.take(); } #[inline] pub(crate) fn access_order_q_node(&self) -> Option> { - self.deq_nodes.access_order_q_node + self.info.access_order_q_node } #[inline] pub(crate) fn set_access_order_q_node(&mut self, node: Option>) { - self.deq_nodes.access_order_q_node = node; + self.info.access_order_q_node = node; } #[inline] pub(crate) fn take_access_order_q_node(&mut self) -> Option> { - self.deq_nodes.access_order_q_node.take() + self.info.access_order_q_node.take() } #[inline] pub(crate) fn write_order_q_node(&self) -> Option> { - self.deq_nodes.write_order_q_node + self.info.write_order_q_node } #[inline] pub(crate) fn set_write_order_q_node(&mut self, node: Option>) { - self.deq_nodes.write_order_q_node = node; + self.info.write_order_q_node = node; } #[inline] pub(crate) fn take_write_order_q_node(&mut self) -> Option> { - self.deq_nodes.write_order_q_node.take() + self.info.write_order_q_node.take() + } + + #[inline] + pub(crate) fn policy_weight(&self) -> u32 { + self.info.policy_weight + } + + #[inline] + pub(crate) fn set_policy_weight(&mut self, policy_weight: u32) { + self.info.policy_weight = policy_weight; } } @@ -120,7 +132,7 @@ impl AccessTime for ValueEntry { #[inline] fn set_last_accessed(&mut self, timestamp: Instant) { - if let Some(mut node) = self.deq_nodes.access_order_q_node { + if let Some(mut node) = self.info.access_order_q_node { unsafe { node.as_mut() }.set_last_accessed(timestamp); } } @@ -133,7 +145,7 @@ impl AccessTime for ValueEntry { #[inline] fn set_last_modified(&mut self, timestamp: Instant) { - if let Some(mut node) = self.deq_nodes.write_order_q_node { + if let Some(mut node) = self.info.write_order_q_node { unsafe { node.as_mut() }.set_last_modified(timestamp); } } diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index 745dddb2..dbe932aa 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -229,11 +229,10 @@ where self.evict_lru_entries(); let policy_weight = weigh(&mut self.weigher, &key, &value); let key = Rc::new(key); - let entry = ValueEntry::new(value); + let entry = ValueEntry::new(value, policy_weight); if let Some(old_entry) = self.cache.insert(Rc::clone(&key), entry) { - let old_policy_weight = weigh(&mut self.weigher, &key, &old_entry.value) as u64; - self.handle_update(key, timestamp, policy_weight, old_entry, old_policy_weight); + self.handle_update(key, timestamp, policy_weight, old_entry); } else { let hash = self.hash(&key); self.handle_insert(key, hash, policy_weight, timestamp); @@ -252,10 +251,11 @@ where self.evict_expired_if_needed(); self.evict_lru_entries(); - // TODO: Update the weighted_size. if let Some(mut entry) = self.cache.remove(key) { + let weight = entry.policy_weight(); self.deques.unlink_ao(&mut entry); - Deques::unlink_wo(&mut self.deques.write_order, &mut entry) + Deques::unlink_wo(&mut self.deques.write_order, &mut entry); + self.saturating_sub_from_total_weight(weight as u64); } } @@ -298,13 +298,17 @@ where .map(|(key, _)| Rc::clone(key)) .collect::>(); - // TODO: Update the weighted_size. + let mut invalidated = 0u64; + keys_to_invalidate.into_iter().for_each(|k| { if let Some(mut entry) = cache.remove(&k) { + let weight = entry.policy_weight(); deques.unlink_ao(&mut entry); Deques::unlink_wo(&mut deques.write_order, &mut entry); + invalidated = invalidated.saturating_sub(weight as u64); } }); + self.saturating_sub_from_total_weight(invalidated); } /// Returns the `max_capacity` of this cache. @@ -576,21 +580,24 @@ where timestamp: Option, policy_weight: u32, old_entry: ValueEntry, - old_policy_weight: u64, ) { + let old_policy_weight = old_entry.policy_weight(); + let entry = self.cache.get_mut(&key).unwrap(); entry.replace_deq_nodes_with(old_entry); if let Some(ts) = timestamp { entry.set_last_accessed(ts); entry.set_last_modified(ts); } + entry.set_policy_weight(policy_weight); + let deqs = &mut self.deques; deqs.move_to_back_ao(entry); if self.time_to_live.is_some() { deqs.move_to_back_wo(entry); } - self.saturating_sub_from_total_weight(old_policy_weight); + self.saturating_sub_from_total_weight(old_policy_weight as u64); self.saturating_add_to_total_weight(policy_weight as u64); } @@ -602,13 +609,12 @@ where if self.time_to_idle.is_some() { let deqs = &mut self.deques; - let (window, probation, protected, wo, cache, weigher, time_to_idle) = ( + let (window, probation, protected, wo, cache, time_to_idle) = ( &mut deqs.window, &mut deqs.probation, &mut deqs.protected, &mut deqs.write_order, &mut self.cache, - &mut self.weigher, &self.time_to_idle, ); @@ -619,7 +625,6 @@ where wo, cache, time_to_idle, - weigher, EVICTION_BATCH_SIZE, now, ) @@ -635,7 +640,6 @@ where } } - #[allow(clippy::too_many_arguments)] #[inline] fn remove_expired_ao( deq_name: &str, @@ -643,7 +647,6 @@ where write_order_deq: &mut Deque>, cache: &mut CacheStore, time_to_idle: &Option, - weigher: &mut Option>, batch_size: usize, now: Instant, ) -> u64 { @@ -668,7 +671,7 @@ where let key = key.unwrap(); if let Some(mut entry) = cache.remove(&key) { - let weight = weigh(weigher, &key, &entry.value); + let weight = entry.policy_weight(); Deques::unlink_ao_from_deque(deq_name, deq, &mut entry); Deques::unlink_wo(write_order_deq, &mut entry); evicted = evicted.saturating_add(weight as u64); @@ -706,7 +709,7 @@ where let key = key.unwrap(); if let Some(mut entry) = self.cache.remove(&key) { - let weight = weigh(&mut self.weigher, &key, &entry.value); + let weight = entry.policy_weight(); self.deques.unlink_ao(&mut entry); Deques::unlink_wo(&mut self.deques.write_order, &mut entry); evicted = evicted.saturating_sub(weight as u64); @@ -745,7 +748,7 @@ where let key = key.unwrap(); if let Some(mut entry) = cache.remove(&key) { - let weight = weigh(&mut self.weigher, &key, &entry.value); + let weight = entry.policy_weight(); Deques::unlink_ao_from_deque(DEQ_NAME, probation, &mut entry); Deques::unlink_wo(wo, &mut entry); evicted = evicted.saturating_add(weight as u64); From 9bca18385a33104ae3098231035d8f6ad44bbf06 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Thu, 30 Dec 2021 20:05:45 +0800 Subject: [PATCH 37/42] Size-aware cache management Update `sync::SegmentedCache` to support weigher function. --- src/future/builder.rs | 3 +- src/sync.rs | 2 +- src/sync/builder.rs | 9 +++-- src/sync/segment.rs | 85 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/src/future/builder.rs b/src/future/builder.rs index 56ac5529..8d63622d 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -5,6 +5,7 @@ use std::{ collections::hash_map::RandomState, hash::{BuildHasher, Hash}, marker::PhantomData, + sync::Arc, time::Duration, }; @@ -155,7 +156,7 @@ impl CacheBuilder { /// Sets the weigher closure of the cache. pub fn weigher(self, weigher: impl Fn(&K, &V) -> u32 + Send + Sync + 'static) -> Self { Self { - weigher: Some(Box::new(weigher)), + weigher: Some(Arc::new(weigher)), ..self } } diff --git a/src/sync.rs b/src/sync.rs index 854bbe5e..c71fc09c 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -37,7 +37,7 @@ pub trait ConcurrentCacheExt { fn sync(&self); } -pub(crate) type Weigher = Box u32 + Send + Sync + 'static>; +pub(crate) type Weigher = Arc u32 + Send + Sync + 'static>; pub(crate) trait AccessTime { fn last_accessed(&self) -> Option; diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 2f42fb1f..5ce960a6 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -5,6 +5,7 @@ use std::{ collections::hash_map::RandomState, hash::{BuildHasher, Hash}, marker::PhantomData, + sync::Arc, time::Duration, }; @@ -88,12 +89,12 @@ where /// /// # Panics /// - /// Panics if `num_segments` is less than or equals to 1. + /// Panics if `num_segments` is zero. pub fn segments( self, num_segments: usize, ) -> CacheBuilder> { - assert!(num_segments > 1); + assert!(num_segments != 0); CacheBuilder { max_capacity: self.max_capacity, @@ -181,6 +182,7 @@ where self.initial_capacity, self.num_segments.unwrap(), build_hasher, + self.weigher, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -207,6 +209,7 @@ where self.initial_capacity, self.num_segments.unwrap(), hasher, + self.weigher, self.time_to_live, self.time_to_idle, self.invalidator_enabled, @@ -234,7 +237,7 @@ impl CacheBuilder { /// Sets the weigher closure of the cache. pub fn weigher(self, weigher: impl Fn(&K, &V) -> u32 + Send + Sync + 'static) -> Self { Self { - weigher: Some(Box::new(weigher)), + weigher: Some(Arc::new(weigher)), ..self } } diff --git a/src/sync/segment.rs b/src/sync/segment.rs index 1297b625..25e03f8e 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -1,4 +1,4 @@ -use super::{cache::Cache, ConcurrentCacheExt}; +use super::{cache::Cache, CacheBuilder, ConcurrentCacheExt, Weigher}; use crate::PredicateError; use std::{ @@ -79,9 +79,14 @@ where build_hasher, None, None, + None, false, ) } + + pub fn builder(num_segments: usize) -> CacheBuilder> { + CacheBuilder::default().segments(num_segments) + } } impl SegmentedCache @@ -93,11 +98,13 @@ where /// # Panics /// /// Panics if `num_segments` is 0. + #[allow(clippy::too_many_arguments)] pub(crate) fn with_everything( max_capacity: Option, initial_capacity: Option, num_segments: usize, build_hasher: S, + weigher: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -108,6 +115,7 @@ where initial_capacity, num_segments, build_hasher, + weigher, time_to_live, time_to_idle, invalidator_enabled, @@ -360,11 +368,13 @@ where /// # Panics /// /// Panics if `num_segments` is 0. + #[allow(clippy::too_many_arguments)] fn new( max_capacity: Option, initial_capacity: Option, num_segments: usize, build_hasher: S, + weigher: Option>, time_to_live: Option, time_to_idle: Option, invalidator_enabled: bool, @@ -384,7 +394,7 @@ where seg_max_capacity, seg_init_capacity, build_hasher.clone(), - None, // TODO + weigher.as_ref().map(Arc::clone), time_to_live, time_to_idle, invalidator_enabled, @@ -479,6 +489,77 @@ mod tests { cache.invalidate(&"b"); } + #[test] + fn size_aware_eviction() { + let weigher = |_k: &&str, v: &(&str, u32)| v.1; + + let alice = ("alice", 10); + let bob = ("bob", 15); + let bill = ("bill", 20); + let cindy = ("cindy", 5); + let david = ("david", 15); + let dennis = ("dennis", 15); + + let mut cache = SegmentedCache::builder(1) + .max_capacity(31) + .weigher(weigher) + .build(); + cache.reconfigure_for_testing(); + + // Make the cache exterior immutable. + let cache = cache; + + cache.insert("a", alice); + cache.insert("b", bob); + assert_eq!(cache.get(&"a"), Some(alice)); + assert_eq!(cache.get(&"b"), Some(bob)); + cache.sync(); + // order (LRU -> MRU) and counts: a -> 1, b -> 1 + + cache.insert("c", cindy); + assert_eq!(cache.get(&"c"), Some(cindy)); + // order and counts: a -> 1, b -> 1, c -> 1 + cache.sync(); + + assert_eq!(cache.get(&"a"), Some(alice)); + assert_eq!(cache.get(&"b"), Some(bob)); + cache.sync(); + // order and counts: c -> 1, a -> 2, b -> 2 + + // To enter "d" (weight: 15), it needs to evict "c" (w: 5) and "a" (w: 10). + // "d" must have higher count than 3, which is the aggregated count + // of "a" and "c". + cache.insert("d", david); // count: d -> 0 + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 1 + + cache.insert("d", david); + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 2 + + cache.insert("d", david); + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 3 + + cache.insert("d", david); + cache.sync(); + assert_eq!(cache.get(&"d"), None); // d -> 4 + + // Finally "d" should be admitted by evicting "c" and "a". + cache.insert("d", dennis); + cache.sync(); + assert_eq!(cache.get(&"a"), None); + assert_eq!(cache.get(&"b"), Some(bob)); + assert_eq!(cache.get(&"c"), None); + assert_eq!(cache.get(&"d"), Some(dennis)); + + // Update "b" with "bill" (w: 20). This should evict "d" (w: 15). + cache.insert("b", bill); + cache.sync(); + assert_eq!(cache.get(&"b"), Some(bill)); + assert_eq!(cache.get(&"d"), None); + } + #[test] fn basic_multi_threads() { let num_threads = 4; From 554e6ac16b4ca9323c9b6b5e364f3ee9ef7060b1 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Fri, 31 Dec 2021 11:38:42 +0800 Subject: [PATCH 38/42] Size-aware cache management Update the docs. --- README.md | 49 +++++++++++++++++++++++++++++++++++-------- src/future/builder.rs | 3 +++ src/future/cache.rs | 33 ++++++++++++++++++----------- src/lib.rs | 7 +++---- src/sync/builder.rs | 3 +++ src/sync/cache.rs | 21 +++++++++++++------ src/sync/segment.rs | 4 ++++ src/unsync/builder.rs | 3 +++ src/unsync/cache.rs | 4 ++++ 9 files changed, 96 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 2662a8dc..d8759648 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ [![license][license-badge]](#license) [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fmoka-rs%2Fmoka.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2Fmoka-rs%2Fmoka?ref=badge_shield) -Moka is a fast, concurrent cache library for Rust. Moka is inspired by -[Caffeine][caffeine-git] (Java). +Moka is a fast, concurrent cache library for Rust. Moka is inspired by the +[Caffeine][caffeine-git] library for Java. Moka provides cache implementations on top of hash maps. They support full concurrency of retrievals and a high expected concurrency for updates. Moka also @@ -56,15 +56,15 @@ algorithm to determine which entries to evict when the capacity is exceeded. ## Moka in Production -Moka is powering production services as well as embedded devices like home routers. -Here are some highlights: +Moka is powering production services as well as embedded Linux devices like home +routers. Here are some highlights: - [crates.io](https://crates.io/): The official crate registry has been using Moka in its API service to reduce the loads on PostgreSQL. Moka is maintaining [cache hit rates of ~85%][gh-discussions-51] for the high-traffic download endpoint. (Moka used: Nov 2021 — present) - [aliyundrive-webdav][aliyundrive-webdav-git]: This WebDAV gateway for a cloud drive - may have been deployed in hundreds of home WiFi routers, including inexpensive + may have been deployed in hundreds of home Wi-Fi routers, including inexpensive models with 32-bit MIPS or ARMv5TE-based SoCs. Moka is used to cache the metadata of remote files. (Moka used: Aug 2021 — present) @@ -93,8 +93,8 @@ moka = { version = "0.6", features = ["future"] } The thread-safe, synchronous caches are defined in the `sync` module. -Cache entries are manually added using `insert` method, and are stored in the cache -until either evicted or manually invalidated. +Cache entries are manually added using `insert` or `get_or_insert_with` method, and +are stored in the cache until either evicted or manually invalidated. Here's an example of reading and updating a cache by using multiple threads: @@ -154,6 +154,12 @@ fn main() { } ``` +If you want to atomically initialize and insert a value when the key is not present, +you might want to check [the document][doc-sync-cache] for other insertion methods +`get_or_insert_with` and `get_or_try_insert_with`. + +[doc-sync-cache]: https://docs.rs/moka/*/moka/sync/struct.Cache.html#method.get_or_insert_with + ## Example: Asynchronous Cache @@ -241,6 +247,12 @@ async fn main() { } ``` +If you want to atomically initialize and insert a value when the key is not present, +you might want to check [the document][doc-future-cache] for other insertion methods +`get_or_insert_with` and `get_or_try_insert_with`. + +[doc-future-cache]: https://docs.rs/moka/*/moka/future/struct.Cache.html#method.get_or_insert_with + ## Avoiding to clone the value at `get` @@ -283,7 +295,6 @@ use std::convert::TryInto; use moka::sync::Cache; fn main() { - // Evict based on the byte length of strings in the cache. let cache = Cache::builder() // A weigher closure takes &K and &V and returns a u32 representing the // relative size of the entry. Here, we use the byte length of the value @@ -298,6 +309,8 @@ fn main() { } ``` +Note that weighted sizes are not used when making eviction selections. + ## Example: Expiration Policies @@ -443,8 +456,18 @@ $ RUSTFLAGS='--cfg skeptic --cfg trybuild' cargo test \ ## Road Map - [x] `async` optimized caches. (`v0.2.0`) -- [ ] Weight based cache management ([#24](https://github.com/moka-rs/moka/pull/24)) +- [x] Bounding a cache with weighted size of entry. + (`v0.7.0` via [#24](https://github.com/moka-rs/moka/pull/24)) +- [ ] API stabilization. (Smaller core API, shorter names for frequently used + methods) + - e.g. + - `get(&Q)` → `get_if_present(&Q)` + - `get_or_insert_with(K, F)` → `get(K, F)` + - `get_or_try_insert_with(K, F)` → `try_get(K, F)` + - `blocking_insert(K, V)` → `blocking().insert(K, V)`. + - `time_to_live()` → `config().time_to_live()` - [ ] Cache statistics. (Hit rate, etc.) +- [ ] Notifications on eviction, etc. - [ ] Upgrade TinyLFU to Window TinyLFU. - [ ] The variable (per-entry) expiration, using a hierarchical timer wheel. @@ -454,6 +477,14 @@ $ RUSTFLAGS='--cfg skeptic --cfg trybuild' cargo test \ Moka is named after the [moka pot][moka-pot-wikipedia], a stove-top coffee maker that brews espresso-like coffee using boiling water pressurized by steam. +This name would imply the following facts and hopes: + +- Moka is a part of the Java Caffeine cache family. +- It is written in Rust. (Many moka pots are made of aluminum alloy or stainless + steel. We know they don't rust though) +- It should be fast. ("Espresso" in Italian means express) +- It should be easy to use, like a moka pot. + [moka-pot-wikipedia]: https://en.wikipedia.org/wiki/Moka_pot diff --git a/src/future/builder.rs b/src/future/builder.rs index 8d63622d..bbb8a519 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -154,6 +154,9 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. + /// + /// The closure should take `&K` and `&V` as the arguments and returns a `u32` + /// representing the relative size of the entry. pub fn weigher(self, weigher: impl Fn(&K, &V) -> u32 + Send + Sync + 'static) -> Self { Self { weigher: Some(Arc::new(weigher)), diff --git a/src/future/cache.rs b/src/future/cache.rs index b280ba5e..a5380434 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -42,8 +42,9 @@ use std::{ /// cache until either evicted or manually invalidated: /// /// - Inside an async context (`async fn` or `async` block), use -/// [`insert`](#method.insert) or [`invalidate`](#method.invalidate) method for -/// updating the cache and `await` them. +/// [`insert`](#method.insert), [`get_or_insert_with`](#method.get_or_insert_with) +/// or [`invalidate`](#method.invalidate) method for updating the cache and `await` +/// them. /// - Outside any async context, use [`blocking_insert`](#method.blocking_insert) or /// [`blocking_invalidate`](#method.blocking_invalidate) methods. They will block /// for a short time under heavy updates. @@ -116,6 +117,11 @@ use std::{ /// } /// ``` /// +/// If you want to atomically initialize and insert a value when the key is not +/// present, you might want to check other insertion methods +/// [`get_or_insert_with`](#method.get_or_insert_with) and +/// [`get_or_try_insert_with`](#method.get_or_try_insert_with). +/// /// # Avoiding to clone the value at `get` /// /// The return type of `get` method is `Option` instead of `Option<&V>`. Every @@ -161,14 +167,13 @@ use std::{ /// /// // Evict based on the byte length of strings in the cache. /// let cache = Cache::builder() -/// // Up to 32MiB instead of 3M entries because this cache is going to have -/// // a weigher. +/// // A weigher closure takes &K and &V and returns a u32 +/// // representing the relative size of the entry. +/// .weigher(|_key, value: &String| -> u32 { +/// value.len().try_into().unwrap_or(u32::MAX) +/// }) +/// // This cache will hold up to 32MiB of values. /// .max_capacity(32 * 1024 * 1024) -/// // A weigher closure takes &K and &V and returns a u32 representing the -/// // relative size of the entry. -/// .weigher(|_key, value: &String| -> u32 { -/// value.len().try_into().unwrap_or(u32::MAX) -/// }) /// .build(); /// cache.insert(2, "two".to_string()).await; /// } @@ -221,13 +226,13 @@ use std::{ /// .time_to_idle(Duration::from_secs( 5 * 60)) /// // Create the cache. /// .build(); -/// +/// /// // This entry will expire after 5 minutes (TTI) if there is no get(). /// cache.insert(0, "zero").await; -/// +/// /// // This get() will extend the entry life for another 5 minutes. /// cache.get(&0); -/// +/// /// // Even though we keep calling get(), the entry will expire /// // after 30 minutes (TTL) from the insert(). /// } @@ -323,6 +328,10 @@ where ) } + /// Returns a [`CacheBuilder`][builder-struct], which can builds a `Cache` with + /// various configuration knobs. + /// + /// [builder-struct]: ./struct.CacheBuilder.html pub fn builder() -> CacheBuilder> { CacheBuilder::default() } diff --git a/src/lib.rs b/src/lib.rs index 4c0e4e1e..f2ef571a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,13 +2,12 @@ #![warn(rust_2018_idioms)] //! Moka is a fast, concurrent cache library for Rust. Moka is inspired by -//! [Caffeine][caffeine-git] (Java). +//! the [Caffeine][caffeine-git] library for Java. //! //! Moka provides in-memory concurrent cache implementations on top of hash maps. //! They support full concurrency of retrievals and a high expected concurrency for -//! updates. -//! They utilize a lock-free concurrent hash table `SegmentedHashMap` from the -//! [moka-cht][moka-cht-crate] crate for the central key-value storage. +//! updates. They utilize a lock-free concurrent hash table `SegmentedHashMap` from +//! the [moka-cht][moka-cht-crate] crate for the central key-value storage. //! //! Moka also provides an in-memory, non-thread-safe cache implementation for single //! thread applications. diff --git a/src/sync/builder.rs b/src/sync/builder.rs index 5ce960a6..c708de19 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -235,6 +235,9 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. + /// + /// The closure should take `&K` and `&V` as the arguments and returns a `u32` + /// representing the relative size of the entry. pub fn weigher(self, weigher: impl Fn(&K, &V) -> u32 + Send + Sync + 'static) -> Self { Self { weigher: Some(Arc::new(weigher)), diff --git a/src/sync/cache.rs b/src/sync/cache.rs index 5f9a9a33..2f277de1 100644 --- a/src/sync/cache.rs +++ b/src/sync/cache.rs @@ -30,7 +30,8 @@ use std::{ /// /// # Examples /// -/// Cache entries are manually added using `insert` method, and are stored in the +/// Cache entries are manually added using [`insert`](#method.insert) or +/// [`get_or_insert_with`](#method.get_or_insert_with) method, and are stored in the /// cache until either evicted or manually invalidated. /// /// Here's an example of reading and updating a cache by using multiple threads: @@ -88,6 +89,11 @@ use std::{ /// } /// ``` /// +/// If you want to atomically initialize and insert a value when the key is not +/// present, you might want to check other insertion methods +/// [`get_or_insert_with`](#method.get_or_insert_with) and +/// [`get_or_try_insert_with`](#method.get_or_try_insert_with). +/// /// # Avoiding to clone the value at `get` /// /// The return type of `get` method is `Option` instead of `Option<&V>`. Every @@ -124,14 +130,13 @@ use std::{ /// /// // Evict based on the byte length of strings in the cache. /// let cache = Cache::builder() -/// // Up to 32MiB instead of 3M entries because this cache is going to have -/// // a weigher. -/// .max_capacity(32 * 1024 * 1024) -/// // A weigher closure takes &K and &V and returns a u32 representing the -/// // relative size of the entry. +/// // A weigher closure takes &K and &V and returns a u32 +/// // representing the relative size of the entry. /// .weigher(|_key, value: &String| -> u32 { /// value.len().try_into().unwrap_or(u32::MAX) /// }) +/// // This cache will hold up to 32MiB of values. +/// .max_capacity(32 * 1024 * 1024) /// .build(); /// cache.insert(2, "two".to_string()); /// ``` @@ -275,6 +280,10 @@ where ) } + /// Returns a [`CacheBuilder`][builder-struct], which can builds a `Cache` or + /// `SegmentedCache` with various configuration knobs. + /// + /// [builder-struct]: ./struct.CacheBuilder.html pub fn builder() -> CacheBuilder> { CacheBuilder::default() } diff --git a/src/sync/segment.rs b/src/sync/segment.rs index 25e03f8e..824837ba 100644 --- a/src/sync/segment.rs +++ b/src/sync/segment.rs @@ -84,6 +84,10 @@ where ) } + /// Returns a [`CacheBuilder`][builder-struct], which can builds a + /// `SegmentedCache` with various configuration knobs. + /// + /// [builder-struct]: ./struct.CacheBuilder.html pub fn builder(num_segments: usize) -> CacheBuilder> { CacheBuilder::default().segments(num_segments) } diff --git a/src/unsync/builder.rs b/src/unsync/builder.rs index c5cff345..d29c525f 100644 --- a/src/unsync/builder.rs +++ b/src/unsync/builder.rs @@ -137,6 +137,9 @@ impl CacheBuilder { } /// Sets the weigher closure of the cache. + /// + /// The closure should take `&K` and `&V` as the arguments and returns a `u32` + /// representing the relative size of the entry. pub fn weigher(self, weigher: impl FnMut(&K, &V) -> u32 + 'static) -> Self { Self { weigher: Some(Box::new(weigher)), diff --git a/src/unsync/cache.rs b/src/unsync/cache.rs index dbe932aa..d84b21df 100644 --- a/src/unsync/cache.rs +++ b/src/unsync/cache.rs @@ -138,6 +138,10 @@ where Self::with_everything(Some(max_capacity), None, build_hasher, None, None, None) } + /// Returns a [`CacheBuilder`][builder-struct], which can builds a `Cache` with + /// various configuration knobs. + /// + /// [builder-struct]: ./struct.CacheBuilder.html pub fn builder() -> CacheBuilder> { CacheBuilder::default() } From 56f860e2845aab9b51f8f3d50fa8389271d6df03 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Fri, 31 Dec 2021 12:01:53 +0800 Subject: [PATCH 39/42] Update the CHANGELOG --- CHANGELOG.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8689e95d..854e407c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Moka — Change Log +## Version 0.7.0 + +### Added + +- Add support for weight-based (size aware) cache management. + ([#24][gh-pull-0024]) +- Add support for unbound cache. ([#24][gh-pull-0024]) + + ## Version 0.6.3 ### Fixed @@ -37,11 +46,6 @@ causes subsequent calls on the same key to get "unreachable code" panics. ([#43][gh-issue-0043]) -### Added - -- Add support for size aware admission policy. ([#24][gh-pull-0024]) -- Add support for unbound cache. ([#24][gh-pull-0024]) - ### Changed - Change `get_or_try_insert_with` to return a concrete error type rather From 97aec97657f66f5f66b1bf2dccbcc56d70ad747f Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Fri, 31 Dec 2021 12:03:59 +0800 Subject: [PATCH 40/42] Bump the version to v0.7.0 --- Cargo.toml | 2 +- README.md | 10 +++++----- src/future/builder.rs | 2 +- src/future/cache.rs | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 93db0104..f4435530 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "moka" -version = "0.6.3" +version = "0.7.0" authors = ["Tatsuya Kawano "] edition = "2018" diff --git a/README.md b/README.md index d8759648..21a2b324 100644 --- a/README.md +++ b/README.md @@ -78,14 +78,14 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -moka = "0.6" +moka = "0.7" ``` To use the asynchronous cache, enable a crate feature called "future". ```toml [dependencies] -moka = { version = "0.6", features = ["future"] } +moka = { version = "0.7", features = ["future"] } ``` @@ -187,7 +187,7 @@ Here is a similar program to the previous example, but using asynchronous cache // Cargo.toml // // [dependencies] -// moka = { version = "0.6", features = ["future"] } +// moka = { version = "0.7", features = ["future"] } // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } // futures = "0.3" @@ -426,9 +426,9 @@ to the dependency declaration. ```toml:Cargo.toml [dependencies] -moka = { version = "0.6", default-feautures = false } +moka = { version = "0.7", default-feautures = false } # Or -moka = { version = "0.6", default-feautures = false, features = ["future"] } +moka = { version = "0.7", default-feautures = false, features = ["future"] } ``` This will make Moka to switch to a fall-back implementation, so it will compile. diff --git a/src/future/builder.rs b/src/future/builder.rs index bbb8a519..e1b08c3b 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -19,7 +19,7 @@ use std::{ /// // Cargo.toml /// // /// // [dependencies] -/// // moka = { version = "0.6", features = ["future"] } +/// // moka = { version = "0.7", features = ["future"] } /// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } /// // futures = "0.3" /// diff --git a/src/future/cache.rs b/src/future/cache.rs index a5380434..6e352432 100644 --- a/src/future/cache.rs +++ b/src/future/cache.rs @@ -58,7 +58,7 @@ use std::{ /// // Cargo.toml /// // /// // [dependencies] -/// // moka = { version = "0.6", features = ["future"] } +/// // moka = { version = "0.7", features = ["future"] } /// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } /// // futures = "0.3" /// @@ -148,7 +148,7 @@ use std::{ /// // Cargo.toml /// // /// // [dependencies] -/// // moka = { version = "0.6", features = ["future"] } +/// // moka = { version = "0.7", features = ["future"] } /// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } /// // futures = "0.3" /// @@ -210,7 +210,7 @@ use std::{ /// // Cargo.toml /// // /// // [dependencies] -/// // moka = { version = "0.6", features = ["future"] } +/// // moka = { version = "0.7", features = ["future"] } /// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } /// // futures = "0.3" /// @@ -398,7 +398,7 @@ where /// // Cargo.toml /// // /// // [dependencies] - /// // moka = { version = "0.6", features = ["future"] } + /// // moka = { version = "0.7", features = ["future"] } /// // futures = "0.3" /// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } /// use moka::future::Cache; @@ -489,7 +489,7 @@ where /// // Cargo.toml /// // /// // [dependencies] - /// // moka = { version = "0.6", features = ["future"] } + /// // moka = { version = "0.7", features = ["future"] } /// // futures = "0.3" /// // reqwest = "0.11" /// // tokio = { version = "1", features = ["rt-multi-thread", "macros" ] } From 18a3e7105bb7ac9e1a8adfef158838a8684195f8 Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Fri, 31 Dec 2021 12:06:53 +0800 Subject: [PATCH 41/42] Update the copyright year --- LICENSE-APACHE | 2 +- LICENSE-MIT | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE-APACHE b/LICENSE-APACHE index 166c0fda..fad13c8d 100644 --- a/LICENSE-APACHE +++ b/LICENSE-APACHE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2020 - 2021 Tatsuya Kawano + Copyright 2020 - 2022 Tatsuya Kawano Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/LICENSE-MIT b/LICENSE-MIT index d9ec411c..aaf52b58 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 - 2021 Tatsuya Kawano +Copyright (c) 2020 - 2022 Tatsuya Kawano Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From b77f50572502121146a2aea358e69537c16aee5d Mon Sep 17 00:00:00 2001 From: Tatsuya Kawano Date: Fri, 31 Dec 2021 12:58:47 +0800 Subject: [PATCH 42/42] Size-aware cache management Adding finishing touch. --- src/future/builder.rs | 6 +++--- src/sync/base_cache.rs | 1 - src/sync/builder.rs | 6 +++--- src/sync/entry_info.rs | 4 ++++ src/unsync/builder.rs | 6 +++--- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/future/builder.rs b/src/future/builder.rs index e1b08c3b..47711da8 100644 --- a/src/future/builder.rs +++ b/src/future/builder.rs @@ -145,10 +145,10 @@ impl CacheBuilder { } } - /// Sets the initial capacity of the cache. - pub fn initial_capacity(self, capacity: usize) -> Self { + /// Sets the initial capacity (number of entries) of the cache. + pub fn initial_capacity(self, number_of_entries: usize) -> Self { Self { - initial_capacity: Some(capacity), + initial_capacity: Some(number_of_entries), ..self } } diff --git a/src/sync/base_cache.rs b/src/sync/base_cache.rs index 7b1b4a4d..990d54f5 100644 --- a/src/sync/base_cache.rs +++ b/src/sync/base_cache.rs @@ -254,7 +254,6 @@ where Arc::clone(&key), // on_insert || { - // let entry = Arc::new(ValueEntry::new(value.clone(), ws)); let entry = self.new_value_entry(value.clone(), weight); let cnt = op_cnt1.fetch_add(1, Ordering::Relaxed); op1 = Some(( diff --git a/src/sync/builder.rs b/src/sync/builder.rs index c708de19..081fcd45 100644 --- a/src/sync/builder.rs +++ b/src/sync/builder.rs @@ -226,10 +226,10 @@ impl CacheBuilder { } } - /// Sets the initial capacity of the cache. - pub fn initial_capacity(self, capacity: usize) -> Self { + /// Sets the initial capacity (number of entries) of the cache. + pub fn initial_capacity(self, number_of_entries: usize) -> Self { Self { - initial_capacity: Some(capacity), + initial_capacity: Some(number_of_entries), ..self } } diff --git a/src/sync/entry_info.rs b/src/sync/entry_info.rs index d20628c6..f10ba67c 100644 --- a/src/sync/entry_info.rs +++ b/src/sync/entry_info.rs @@ -6,6 +6,10 @@ use std::sync::{ use super::{AccessTime, CacheFeatures}; use crate::common::{atomic_time::AtomicInstant, time::Instant}; +// We use enum-based dynamic dispatch here, rather than using trait-object-based +// dynamic dispatch. Our benchmark programs showed enum-based dispatch was slightly +// (1% or 2%) faster than other in our use cases. + pub(crate) enum EntryInfo { Plain(Arc), Weighted(Arc), diff --git a/src/unsync/builder.rs b/src/unsync/builder.rs index d29c525f..f8b79eee 100644 --- a/src/unsync/builder.rs +++ b/src/unsync/builder.rs @@ -128,10 +128,10 @@ impl CacheBuilder { } } - /// Sets the initial capacity of the cache. - pub fn initial_capacity(self, capacity: usize) -> Self { + /// Sets the initial capacity (number of entries) of the cache. + pub fn initial_capacity(self, number_of_entries: usize) -> Self { Self { - initial_capacity: Some(capacity), + initial_capacity: Some(number_of_entries), ..self } }