-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add limit and shrink policy for recycler #15320
Conversation
1379454
to
b574493
Compare
perf/src/recycler.rs
Outdated
if now.saturating_sub(object_pool.last_shrink_check_ts) | ||
> object_pool.check_shrink_interval_ms as u64 | ||
{ | ||
object_pool.last_shrink_check_ts = now; | ||
let shrink_threshold_count = (object_pool.shrink_ratio | ||
* object_pool.total_allocated_count as f64) | ||
.ceil() as u32; | ||
|
||
// If more than the shrink threshold of all allocated objects are sitting doing nothing, | ||
// increment the `above_shrink_ratio_count`. | ||
if object_pool.len() > object_pool.minimum_object_count as usize | ||
&& object_pool.len() > shrink_threshold_count as usize | ||
{ | ||
object_pool.above_shrink_ratio_count += 1; | ||
} else { | ||
object_pool.above_shrink_ratio_count = 0; | ||
} | ||
|
||
let mut t = T::default(); | ||
t.set_recycler(Arc::downgrade(&self.recycler)); | ||
t | ||
if object_pool.above_shrink_ratio_count as usize | ||
>= object_pool.max_above_shrink_ratio_count as usize | ||
{ | ||
// Do the shrink | ||
let total_alocated_count_shrink_target = | ||
std::cmp::max(object_pool.minimum_object_count, shrink_threshold_count); | ||
let target_num_to_shrink = | ||
object_pool.total_allocated_count - total_alocated_count_shrink_target; | ||
for _ in 0..target_num_to_shrink { | ||
if let Some(mut expired_object) = object_pool.object_pool.pop() { | ||
expired_object.unset_recycler(); | ||
// May not be able to shrink exactly `target_num_to_shrink` objects sinc | ||
// in the case of new allocations, `total_allocated_count` is incremented | ||
// before the object is allocated (see `should_allocate_new` logic below). | ||
// This race allows a difference of up to the number of threads allocating | ||
// with this recycler. | ||
object_pool.total_allocated_count -= 1; | ||
} | ||
} | ||
|
||
object_pool.above_shrink_ratio_count = 0; | ||
datapoint_info!( | ||
"recycler_shrink", | ||
( | ||
"total_alocated_count_shrink_target", | ||
total_alocated_count_shrink_target as i64, | ||
i64 | ||
), | ||
("target_num_to_shrink", target_num_to_shrink as i64, i64), | ||
( | ||
"total_allocated_count", | ||
object_pool.total_allocated_count as i64, | ||
i64 | ||
), | ||
); | ||
} | ||
} | ||
|
||
let reused_object = object_pool.object_pool.pop(); | ||
if reused_object.is_some() { | ||
(reused_object, true, false) | ||
} else if let Some(limit) = object_pool.limit { | ||
let should_allocate_new = object_pool.total_allocated_count < limit; | ||
if should_allocate_new { | ||
object_pool.total_allocated_count += 1; | ||
} | ||
(None, false, should_allocate_new) | ||
} else { | ||
(None, false, true) | ||
} | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ryoqun this elastic shrinking seems to be working well for my test case where I spam the node, I can see the total number of objects rising then falling over time after the spam is over. The shrink itself is pretty fast (see below). Dropping the removed objects is the most expensive, but that's done outside of the lock.
[2021-02-15T05:54:13.916482353Z INFO solana_metrics::metrics]
datapoint: recycler_shrink target_size=16000i resulting_size=16000i ideal_num_to_remove=4000i recycler_shrink_elapsed=181i drop_elapsed=3145i
[2021-02-15T05:55:54.415188169Z INFO solana_metrics::metrics]
datapoint: recycler_shrink target_size=12800i resulting_size=12800i ideal_num_to_remove=3200i recycler_shrink_elapsed=217i drop_elapsed=2290i
[2021-02-15T05:57:34.937557165Z INFO solana_metrics::metrics]
datapoint: recycler_shrink target_size=10240i resulting_size=10240i ideal_num_to_remove=2560i recycler_shrink_elapsed=150i drop_elapsed=1758i
[2021-02-15T05:59:15.860579682Z INFO solana_metrics::metrics]
datapoint: recycler_shrink target_size=8192i resulting_size=8192i ideal_num_to_remove=2048i recycler_shrink_elapsed=161i drop_elapsed=1662i
[2021-02-15T06:00:56.903362743Z INFO solana_metrics::metrics]
datapoint: recycler_shrink target_size=6554i resulting_size=6554i ideal_num_to_remove=1638i recycler_shrink_elapsed=116i drop_elapsed=1473i
[2021-02-15T06:02:37.612972791Z INFO solana_metrics::metrics]
datapoint: recycler_shrink target_size=5244i resulting_size=5244i ideal_num_to_remove=1310i recycler_shrink_elapsed=112i drop_elapsed=1160i
[2021-02-15T06:04:18.454551804Z INFO solana_metrics::metrics]
datapoint: recycler_shrink target_size=4196i resulting_size=4196i ideal_num_to_remove=1048i recycler_shrink_elapsed=111i drop_elapsed=1112i
What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
f3d3907
to
b32f340
Compare
8f747d4
to
f26d334
Compare
perf/src/recycler.rs
Outdated
} | ||
|
||
fn get_shrink_target(shrink_ratio: f64, current_size: u32) -> u32 { | ||
(shrink_ratio * current_size as f64).ceil() as u32 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need float for this.
(i'll review this today.) |
perf/src/recycler.rs
Outdated
if i >= limit { | ||
assert!(x.is_none()); | ||
} else { | ||
allocated_items.push(x.unwrap()); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe invert the then
clause and else
clause? I usually want to put something happen first in the then
clause.
Codecov Report
@@ Coverage Diff @@
## master #15320 +/- ##
=========================================
- Coverage 80.2% 79.6% -0.6%
=========================================
Files 406 404 -2
Lines 103910 103654 -256
=========================================
- Hits 83377 82572 -805
- Misses 20533 21082 +549 |
could you mix my perf test into this pr? https://github.com/solana-labs/solana/pull/15139/files#r578352709 |
@carllin Oh, are you wondering excessive allocation? I know gossip doesn't recycle. Here's my patch in the baking queue. Could you also incorporate this? |
Also, I think we should test ala #8175 |
perf/src/recycler.rs
Outdated
} | ||
|
||
impl<T: Default + Reset> Recycler<T> { | ||
pub fn new(shrink_metric_name: &'static str) -> Self { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nits: new_without_limit
? This looks like it's pretty dangerous to instantiate this if confronted to untrusted data flow. So indicate the name as so. :)
perf/src/recycler.rs
Outdated
.lock() | ||
.expect("recycler lock in pb fn allocate"); | ||
|
||
let now = timestamp(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's generally more suited to use Instant
just directly rather than timestamp()
(which is backed by SystemTime
). Because of monotonically non-decreasing nature of Instant
over SystemTime
.
perf/src/recycler.rs
Outdated
if now.saturating_sub(object_pool.last_shrink_check_ts) | ||
> object_pool.check_shrink_interval_ms as u64 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By using Instant, you can write like this: https://github.com/solana-labs/solana/pull/15139/files#r578474676
perf/src/recycler.rs
Outdated
// Drop these outside of the lock because the Drop() implmentation for | ||
// certain objects like PinnedVec's can be expensive | ||
shrink_removed_objects.push(expired_object); | ||
// May not be able to shrink exactly `ideal_num_to_remove` objects sinc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nits: since
.
perf/src/recycler.rs
Outdated
let reused_object = object_pool.object_pool.pop(); | ||
if reused_object.is_some() { | ||
(reused_object, true, false, shrink_stats) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nits: I don't think this is really simpler writing, arguably. However I can't resist to propose this:
if let Some(reused_object) = object_pool.object_pool.pop() {
(Some(reused_object), true, false, shrink_stats)
} ...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed in separate make_allocation_decision()
function!
perf/src/recycler.rs
Outdated
); | ||
pub fn allocate(&self) -> Option<T> { | ||
let mut shrink_removed_objects = vec![]; | ||
let (mut allocated_object, did_reuse, should_allocate_new, mut shrink_stats) = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rather long chunk of code.. how about extracting to fn
?
perf/src/recycler.rs
Outdated
} | ||
(None, false, should_allocate_new, shrink_stats) | ||
} else { | ||
(None, false, true, shrink_stats) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these option, bool, bool are hurting my brain a bit...
how about this?:
enum AllocationDecision {
Reuse(T),
Allocate,
DontAllocate,
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yup, was meaning to refactor this mess, thanks!
perf/src/recycler.rs
Outdated
|
||
#[derive(Debug)] | ||
pub struct ObjectPool<T: Reset> { | ||
object_pool: Vec<T>, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nits: object_pool.object_pool
in the other place of this code looks a bit uncool. How about renaming this to objects
?
btw, I don't like RecyclerX
as well, unrelated to your pr...
(cherry picked from commit c2e8814) # Conflicts: # Cargo.lock # perf/Cargo.toml
(cherry picked from commit c2e8814)
(cherry picked from commit c2e8814)
This reverts commit c2e8814.
This reverts commit c2e8814.
This reverts commit c2e8814.
This reverts commit c2e8814.
This reverts commit c2e8814.
This reverts commit c2e8814.
solana-labs#15320 added an allocation limit to the recycler, which has been the source of a number of bugs. For example the code bellow panics by simply cloning packets: const RECYCLER_LIMIT: usize = 8; let recycler = PacketsRecycler::new_with_limit("", RECYCLER_LIMIT as u32); let packets = Packets::new_with_recycler(recycler.clone(), 1).unwrap(); for _ in 0..RECYCLER_LIMIT { let _ = packets.clone(); } Packets::new_with_recycler(recycler.clone(), 1); The implementation also fails to account for instances where objects are consumed. Having the allocation limit in the recycler also seems out of place, as higher level code has better context to impose allocation limits (e.g. by using bounded channels to rate-limit), whereas the recycler would be simpler and more efficient if it just do the recycling. This commit: * Reverts solana-labs#15320 * Adds a shrink policy to the recycler without an allocation limit.
solana-labs#15320 added an allocation limit to the recycler, which has been the source of a number of bugs. For example the code bellow panics by simply cloning packets: const RECYCLER_LIMIT: usize = 8; let recycler = PacketsRecycler::new_with_limit("", RECYCLER_LIMIT as u32); let packets = Packets::new_with_recycler(recycler.clone(), 1).unwrap(); for _ in 0..RECYCLER_LIMIT { let _ = packets.clone(); } Packets::new_with_recycler(recycler.clone(), 1); The implementation also fails to account for instances where objects are consumed. Having the allocation limit in the recycler also seems out of place, as higher level code has better context to impose allocation limits (e.g. by using bounded channels to rate-limit), whereas the recycler would be simpler and more efficient if it just do the recycling. This commit: * Reverts solana-labs#15320 * Adds a shrink policy to the recycler without an allocation limit.
This reverts commit c2e8814.
solana-labs#15320 added an allocation limit to the recycler, which has been the source of a number of bugs. For example the code bellow panics by simply cloning packets: const RECYCLER_LIMIT: usize = 8; let recycler = PacketsRecycler::new_with_limit("", RECYCLER_LIMIT as u32); let packets = Packets::new_with_recycler(recycler.clone(), 1).unwrap(); for _ in 0..RECYCLER_LIMIT { let _ = packets.clone(); } Packets::new_with_recycler(recycler.clone(), 1); The implementation also fails to account for instances where objects are consumed. Having the allocation limit in the recycler also seems out of place, as higher level code has better context to impose allocation limits (e.g. by using bounded channels to rate-limit), whereas the recycler would be simpler and more efficient if it just do the recycling. This commit: * Reverts solana-labs#15320 * Adds a shrink policy to the recycler without an allocation limit.
This reverts commit c2e8814.
solana-labs#15320 added an allocation limit to the recycler, which has been the source of a number of bugs. For example the code bellow panics by simply cloning packets: const RECYCLER_LIMIT: usize = 8; let recycler = PacketsRecycler::new_with_limit("", RECYCLER_LIMIT as u32); let packets = Packets::new_with_recycler(recycler.clone(), 1).unwrap(); for _ in 0..RECYCLER_LIMIT { let _ = packets.clone(); } Packets::new_with_recycler(recycler.clone(), 1); The implementation also fails to account for instances where objects are consumed. Having the allocation limit in the recycler also seems out of place, as higher level code has better context to impose allocation limits (e.g. by using bounded channels to rate-limit), whereas the recycler would be simpler and more efficient if it just do the recycling. This commit: * Reverts solana-labs#15320 * Adds a shrink policy to the recycler without an allocation limit.
This reverts commit c2e8814.
solana-labs#15320 added an allocation limit to the recycler, which has been the source of a number of bugs. For example the code bellow panics by simply cloning packets: const RECYCLER_LIMIT: usize = 8; let recycler = PacketsRecycler::new_with_limit("", RECYCLER_LIMIT as u32); let packets = Packets::new_with_recycler(recycler.clone(), 1).unwrap(); for _ in 0..RECYCLER_LIMIT { let _ = packets.clone(); } Packets::new_with_recycler(recycler.clone(), 1); The implementation also fails to account for instances where objects are consumed. Having the allocation limit in the recycler also seems out of place, as higher level code has better context to impose allocation limits (e.g. by using bounded channels to rate-limit), whereas the recycler would be simpler and more efficient if it just do the recycling. This commit: * Reverts solana-labs#15320 * Adds a shrink policy to the recycler without an allocation limit.
This reverts commit c2e8814.
solana-labs#15320 added an allocation limit to the recycler, which has been the source of a number of bugs. For example the code bellow panics by simply cloning packets: const RECYCLER_LIMIT: usize = 8; let recycler = PacketsRecycler::new_with_limit("", RECYCLER_LIMIT as u32); let packets = Packets::new_with_recycler(recycler.clone(), 1).unwrap(); for _ in 0..RECYCLER_LIMIT { let _ = packets.clone(); } Packets::new_with_recycler(recycler.clone(), 1); The implementation also fails to account for instances where objects are consumed. Having the allocation limit in the recycler also seems out of place, as higher level code has better context to impose allocation limits (e.g. by using bounded channels to rate-limit), whereas the recycler would be simpler and more efficient if it just do the recycling. This commit: * Reverts solana-labs#15320 * Adds a shrink policy to the recycler without an allocation limit.
This reverts commit c2e8814.
#15320 added an allocation limit to the recycler, which has been the source of a number of bugs. For example the code bellow panics by simply cloning packets: const RECYCLER_LIMIT: usize = 8; let recycler = PacketsRecycler::new_with_limit("", RECYCLER_LIMIT as u32); let packets = Packets::new_with_recycler(recycler.clone(), 1).unwrap(); for _ in 0..RECYCLER_LIMIT { let _ = packets.clone(); } Packets::new_with_recycler(recycler.clone(), 1); The implementation also fails to account for instances where objects are consumed. Having the allocation limit in the recycler also seems out of place, as higher level code has better context to impose allocation limits (e.g. by using bounded channels to rate-limit), whereas the recycler would be simpler and more efficient if it just do the recycling. This commit: * Reverts #15320 * Adds a shrink policy to the recycler without an allocation limit.
Problem
Recycler can:
Summary of Changes
N
times consecutivelyFixes #
context: #14366 (blocker for another upcoming leak in gossip's PinnedVec usage).