Skip to content

Commit

Permalink
excludes epoch-slots from nodes with unknown or different shred versi…
Browse files Browse the repository at this point in the history
…on (#17899)

Inspecting TDS gossip table shows that crds values of nodes with
different shred-versions are creeping in. Their epoch-slots are
accumulated in ClusterSlots causing bogus slots very far from current
root which are not purged and so cause ClusterSlots keep consuming more
memory:
#17789
#14366 (comment)
#14366 (comment)

This commit updates ClusterInfo::get_epoch_slots, and discards entries
from nodes with unknown or different shred-version.

Follow up commits will patch gossip not to waste bandwidth and memory
over crds values of nodes with different shred-version.
  • Loading branch information
behzadnouri authored Jun 13, 2021
1 parent 361c1bd commit 985280e
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions gossip/src/cluster_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1170,10 +1170,19 @@ impl ClusterInfo {
.map(map)
}

/// Returns epoch-slots inserted since the given cursor.
/// Excludes entries from nodes with unkown or different shred version.
pub fn get_epoch_slots(&self, cursor: &mut Cursor) -> Vec<EpochSlots> {
let self_shred_version = self.my_shred_version();
let gossip = self.gossip.read().unwrap();
let entries = gossip.crds.get_epoch_slots(cursor);
entries
.filter(
|entry| match gossip.crds.get_contact_info(entry.value.pubkey()) {
Some(node) => node.shred_version == self_shred_version,
None => false,
},
)
.map(|entry| match &entry.value.data {
CrdsData::EpochSlots(_, slots) => slots.clone(),
_ => panic!("this should not happen!"),
Expand Down Expand Up @@ -3839,6 +3848,43 @@ mod tests {

let slots = cluster_info.get_epoch_slots(&mut cursor);
assert!(slots.is_empty());

// Test with different shred versions.
let mut rng = rand::thread_rng();
let node_pubkey = Pubkey::new_unique();
let mut node = ContactInfo::new_rand(&mut rng, Some(node_pubkey));
node.shred_version = 42;
let epoch_slots = EpochSlots::new_rand(&mut rng, Some(node_pubkey));
let entries = vec![
CrdsValue::new_unsigned(CrdsData::ContactInfo(node)),
CrdsValue::new_unsigned(CrdsData::EpochSlots(0, epoch_slots)),
];
{
let mut gossip = cluster_info.gossip.write().unwrap();
for entry in entries {
assert!(gossip.crds.insert(entry, /*now=*/ 0).is_ok());
}
}
// Should exclude other node's epoch-slot because of different
// shred-version.
let slots = cluster_info.get_epoch_slots(&mut Cursor::default());
assert_eq!(slots.len(), 1);
assert_eq!(slots[0].from, cluster_info.id);
// Match shred versions.
{
let mut node = cluster_info.my_contact_info.write().unwrap();
node.shred_version = 42;
}
cluster_info.push_self(
&HashMap::default(), // stakes
None, // gossip validators
);
cluster_info.flush_push_queue();
// Should now include both epoch slots.
let slots = cluster_info.get_epoch_slots(&mut Cursor::default());
assert_eq!(slots.len(), 2);
assert_eq!(slots[0].from, cluster_info.id);
assert_eq!(slots[1].from, node_pubkey);
}

#[test]
Expand Down

0 comments on commit 985280e

Please sign in to comment.