-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Let the coordinator thread open buckets (#782)
`Condvar::wait` is allowed to spuriously wake up at any time even without another thread notifying it. Currently, work buckets are opened and closed under the assumption that all GC workers have parked. Due to spurious wake-ups, any GC worker can wake up at any time, and break the assumption. This PR makes the following changes: 1. Only the coordinator can open more buckets (with the exception of `Prepare`). 2. When all workers have parked, they notify the coordinator that all workers have parked, and wait for the coordinator to open buckets. - Because of this, workers no longer report the "GC finished" event to the coordinator. Now it is the coordinator that determines whether GC has finished. 3. When all workers have parked, a boolean flag `WorkerMonitor::group_sleep` is set. No GC workers can unpark until this flag is explicitly cleared by the coordinator. - This flag makes the GC workers robust against spurious wake-ups. Even if any worker spuriously wakes up, it will find the `group_sleep` flag still set, and will wait again. - Concretely, the flag is cleared if the coordinator adds more work packets to open buckets, or opens new buckets. If no more work is available, GC finishes. To implement those changes, some data structures are modified. 1. The worker-to-coordinator channel is changed from `mpsc::Channel` to a custom `scheduler::controller::channel::Channel`. - It is not a strict FIFO channel. The coordinator always executes all pending coordinator work packets before handling the "all workers have parked" condition. 2. Introduced a `WorkerMonitor` struct as the anything-to-workers notification channel. - It encapsulate the existing `Mutex` and `Condvar` so that the coordinator or other workers can notify workers. - It includes the `parked_workers` counter. `WorkerGroup` is no longer responsible for recording parked workers. - It includes the `group_sleep` boolean flag. 3. Removed the `pending_coordinator_packets` counter. Fixes: #778
- Loading branch information
Showing
7 changed files
with
349 additions
and
270 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
use std::collections::VecDeque; | ||
|
||
use super::*; | ||
|
||
/// A one-way channel for workers to send coordinator packets and notifications to the controller. | ||
struct Channel<VM: VMBinding> { | ||
sync: Mutex<ChannelSync<VM>>, | ||
cond: Condvar, | ||
} | ||
|
||
/// The synchronized parts of `Channel`. | ||
struct ChannelSync<VM: VMBinding> { | ||
/// Pending coordinator work packets. | ||
coordinator_packets: VecDeque<Box<dyn CoordinatorWork<VM>>>, | ||
/// Whether all workers have parked. | ||
/// | ||
/// NOTE: This field is set to `true` by the last parked worker. | ||
/// It is used to notify the coordinator about the event that all workers have parked. | ||
/// To resume workers from "group sleeping", use `WorkerMonitor::notify_work_available`. | ||
all_workers_parked: bool, | ||
} | ||
|
||
/// Each worker holds an instance of this. | ||
/// | ||
/// It wraps a channel, and only allows workers to access it in expected ways. | ||
pub struct Sender<VM: VMBinding> { | ||
chan: Arc<Channel<VM>>, | ||
} | ||
|
||
impl<VM: VMBinding> Clone for Sender<VM> { | ||
fn clone(&self) -> Self { | ||
Self { | ||
chan: self.chan.clone(), | ||
} | ||
} | ||
} | ||
|
||
impl<VM: VMBinding> Sender<VM> { | ||
/// Send a coordinator work packet to the coordinator. | ||
pub fn add_coordinator_work(&self, work: Box<dyn CoordinatorWork<VM>>) { | ||
let mut sync = self.chan.sync.lock().unwrap(); | ||
sync.coordinator_packets.push_back(work); | ||
debug!("A worker has sent a coordinator work packet."); | ||
self.chan.cond.notify_one(); | ||
} | ||
|
||
/// Notify the coordinator that all workers have parked. | ||
pub fn notify_all_workers_parked(&self) { | ||
let mut sync = self.chan.sync.lock().unwrap(); | ||
sync.all_workers_parked = true; | ||
debug!("Notified the coordinator that all workers have parked."); | ||
self.chan.cond.notify_one(); | ||
} | ||
} | ||
|
||
/// The coordinator holds an instance of this. | ||
/// | ||
/// It wraps a channel, and only allows the coordinator to access it in expected ways. | ||
pub struct Receiver<VM: VMBinding> { | ||
chan: Arc<Channel<VM>>, | ||
} | ||
|
||
impl<VM: VMBinding> Receiver<VM> { | ||
/// Get an event. | ||
pub(super) fn poll_event(&self) -> Event<VM> { | ||
let mut sync = self.chan.sync.lock().unwrap(); | ||
loop { | ||
// Make sure the coordinator always sees packets before seeing "all parked". | ||
if let Some(work) = sync.coordinator_packets.pop_front() { | ||
debug!("Received a coordinator packet."); | ||
return Event::Work(work); | ||
} | ||
|
||
if sync.all_workers_parked { | ||
debug!("Observed all workers parked."); | ||
return Event::AllParked; | ||
} | ||
|
||
sync = self.chan.cond.wait(sync).unwrap(); | ||
} | ||
} | ||
|
||
/// Reset the "all workers have parked" flag. | ||
pub fn reset_all_workers_parked(&self) { | ||
let mut sync = self.chan.sync.lock().unwrap(); | ||
sync.all_workers_parked = false; | ||
debug!("The all_workers_parked state is reset."); | ||
} | ||
} | ||
|
||
/// This type represents the events the `Receiver` observes. | ||
pub(crate) enum Event<VM: VMBinding> { | ||
/// Send a work-packet to the coordinator thread. | ||
Work(Box<dyn CoordinatorWork<VM>>), | ||
/// Notify the coordinator thread that all GC tasks are finished. | ||
/// When sending this message, all the work buckets should be | ||
/// empty, and all the workers should be parked. | ||
AllParked, | ||
} | ||
|
||
/// Create a Sender-Receiver pair. | ||
pub(crate) fn make_channel<VM: VMBinding>() -> (Sender<VM>, Receiver<VM>) { | ||
let chan = Arc::new(Channel { | ||
sync: Mutex::new(ChannelSync { | ||
coordinator_packets: Default::default(), | ||
all_workers_parked: false, | ||
}), | ||
cond: Default::default(), | ||
}); | ||
|
||
let sender = Sender { chan: chan.clone() }; | ||
let receiver = Receiver { chan }; | ||
(sender, receiver) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.