Skip to content

Commit

Permalink
test: EXC-1820: Cover monitor and evict thread with tests (#3133)
Browse files Browse the repository at this point in the history
  • Loading branch information
berestovskyy authored Dec 13, 2024
1 parent b1f4339 commit 91dd6b3
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,8 @@ pub struct SandboxedExecutionController {
launcher_service: Box<dyn LauncherService>,
fd_factory: Arc<dyn PageAllocatorFileDescriptor>,
state_reader: Arc<dyn StateReader<State = ReplicatedState>>,
/// A channel to communicate with the `monitoring_and_evict` thread.
/// Send `true` to stop monitoring, `false` to trigger the monitoring.
stop_monitoring_thread: std::sync::mpsc::Sender<bool>,
}

Expand Down Expand Up @@ -1150,6 +1152,7 @@ impl SandboxedExecutionController {
embedder_config: &EmbeddersConfig,
fd_factory: Arc<dyn PageAllocatorFileDescriptor>,
state_reader: Arc<dyn StateReader<State = ReplicatedState>>,
spawn_monitor_thread: bool,
) -> std::io::Result<Self> {
let launcher_exec_argv =
create_launcher_argv(embedder_config).expect("No sandbox_launcher binary found");
Expand All @@ -1168,17 +1171,19 @@ impl SandboxedExecutionController {
let logger_copy = logger.clone();
let (tx, rx) = std::sync::mpsc::channel();

std::thread::spawn(move || {
SandboxedExecutionController::monitor_and_evict_sandbox_processes(
logger_copy,
backends_copy,
metrics_copy,
max_sandbox_count,
max_sandbox_idle_time,
rx,
state_reader_copy,
);
});
if spawn_monitor_thread {
std::thread::spawn(move || {
SandboxedExecutionController::monitor_and_evict_sandbox_processes(
logger_copy,
backends_copy,
metrics_copy,
max_sandbox_count,
max_sandbox_idle_time,
rx,
state_reader_copy,
);
});
}

let exit_watcher = Arc::new(ExitWatcher {
logger: logger.clone(),
Expand Down Expand Up @@ -2084,6 +2089,7 @@ mod tests {

fn sandboxed_execution_controller_dir_and_path(
max_sandbox_count: usize,
spawn_monitor_thread: bool,
) -> (SandboxedExecutionController, TempDir, PathBuf) {
let tempdir = tempfile::tempdir().unwrap();
let log_path = tempdir.path().join("log");
Expand All @@ -2106,18 +2112,16 @@ mod tests {
},
Arc::new(TestPageAllocatorFileDescriptorImpl::new()),
Arc::new(FakeStateManager::new()),
spawn_monitor_thread,
)
.unwrap();
// Stop the background monitoring thread to avoid race conditions with the tests.
while controller.stop_monitoring_thread.send(true).is_ok() {
thread::sleep(Duration::from_millis(10));
}
(controller, tempdir, log_path)
}

#[test]
fn sandbox_history_logged_on_sandbox_crash() {
let (controller, _dir, log_path) = sandboxed_execution_controller_dir_and_path(usize::MAX);
let (controller, _dir, log_path) =
sandboxed_execution_controller_dir_and_path(usize::MAX, false);

let wat = "(module)";
let canister_module = CanisterModule::new(wat::parse_str(wat).unwrap());
Expand Down Expand Up @@ -2227,7 +2231,8 @@ mod tests {
let active = SANDBOX_PROCESSES_TO_EVICT * 2;
let evicted = 3;
let empty = 2;
let (mut controller, _dir, _path) = sandboxed_execution_controller_dir_and_path(active);
let (mut controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active, false);

add_controller_backends(&mut controller, 0, active, evicted, empty);
let partitioned_backends = get_active_evicted_empty_backends(&controller);
Expand Down Expand Up @@ -2269,7 +2274,8 @@ mod tests {
let active = SANDBOX_PROCESSES_TO_EVICT * 2;
let evicted = 3;
let empty = 2;
let (mut controller, _dir, _path) = sandboxed_execution_controller_dir_and_path(active);
let (mut controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active, false);

add_controller_backends(&mut controller, 0, active, evicted, empty);
let partitioned_backends = get_active_evicted_empty_backends(&controller);
Expand Down Expand Up @@ -2316,7 +2322,8 @@ mod tests {
let active = SANDBOX_PROCESSES_TO_EVICT * 2;
let evicted = 3;
let empty = 2;
let (mut controller, _dir, _path) = sandboxed_execution_controller_dir_and_path(active);
let (mut controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active, false);

add_controller_backends(&mut controller, 0, active, evicted, empty);
let partitioned_backends = get_active_evicted_empty_backends(&controller);
Expand Down Expand Up @@ -2358,4 +2365,112 @@ mod tests {
);
assert_eq!(0, partitioned_backends.2.len());
}

#[test]
fn monitor_and_evict_thread_is_spawned() {
let active = 1;
let spawn_monitor_thread = true;
let (controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active, spawn_monitor_thread);
assert!(controller.stop_monitoring_thread.send(true).is_ok());

let spawn_monitor_thread = false;
let (controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active, spawn_monitor_thread);
assert!(controller.stop_monitoring_thread.send(true).is_err());
}

#[test]
#[cfg(not(all(target_arch = "aarch64", target_vendor = "apple")))]
fn monitor_and_evict_thread_collects_rss() {
let active = 1;
let spawn_monitor_thread = false;
let (mut controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active, spawn_monitor_thread);
add_controller_backends(&mut controller, 0, active, 0, 0);
let stats = get_sandbox_process_stats(&controller.backends);
assert_eq!(stats.len(), active);
assert_ne!(stats[0].1.pid, 0);
assert!(stats[0].2.last_used <= Instant::now());
assert!(stats[0].2.last_used >= Instant::now() - Duration::from_secs(1_000));
assert_eq!(stats[0].2.rss, DEFAULT_SANDBOX_PROCESS_RSS);

let spawn_monitor_thread = true;
let (mut controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active, spawn_monitor_thread);
add_controller_backends(&mut controller, 0, active, 0, 0);

// Trigger the monitoring and wait for the monitoring results.
let _ = controller.stop_monitoring_thread.send(false);
for _ in 0..10_000 {
let stats = get_sandbox_process_stats(&controller.backends);
if stats[0].2.rss != DEFAULT_SANDBOX_PROCESS_RSS {
break;
}
thread::sleep(Duration::from_millis(10));
}
let stats = get_sandbox_process_stats(&controller.backends);
assert!(stats[0].2.rss < DEFAULT_SANDBOX_PROCESS_RSS);
}

#[test]
#[cfg(not(all(target_arch = "aarch64", target_vendor = "apple")))]
fn monitor_and_evict_thread_collects_metrics() {
let active = 1;
let evicted = 1;
let spawn_monitor_thread = true;
let (mut controller, _dir, _path) =
sandboxed_execution_controller_dir_and_path(active + evicted, spawn_monitor_thread);
let m = &controller.metrics;
let metric = &m.sandboxed_execution_subprocess_anon_rss;
assert_eq!(metric.get_sample_count(), 0);
assert_eq!(metric.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_memfd_rss;
assert_eq!(metric.get_sample_count(), 0);
assert_eq!(metric.get_sample_sum(), 0.0);
assert_eq!(m.sandboxed_execution_subprocess_rss.get_sample_count(), 0);
assert_eq!(m.sandboxed_execution_subprocess_rss.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_active_last_used;
assert_eq!(metric.get_sample_count(), 0);
assert_eq!(metric.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_evicted_last_used;
assert_eq!(metric.get_sample_count(), 0);
assert_eq!(metric.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_anon_rss_total;
assert_eq!(metric.get(), 0);
let metric = &m.sandboxed_execution_subprocess_memfd_rss_total;
assert_eq!(metric.get(), 0);

add_controller_backends(&mut controller, 0, active, evicted, 0);

// Trigger the monitoring twice and wait for the monitoring results.
let _ = controller.stop_monitoring_thread.send(false);
let _ = controller.stop_monitoring_thread.send(false);
for _ in 0..10_000 {
let m = &controller.metrics;
if m.sandboxed_execution_subprocess_anon_rss.get_sample_count() > 1 {
break;
}
thread::sleep(Duration::from_millis(10));
}
let m = &controller.metrics;
let metric = &m.sandboxed_execution_subprocess_anon_rss;
assert_ne!(metric.get_sample_count(), 0);
assert_ne!(metric.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_memfd_rss;
assert_ne!(metric.get_sample_count(), 0);
assert_eq!(metric.get_sample_sum(), 0.0); // no memfd.
assert_ne!(m.sandboxed_execution_subprocess_rss.get_sample_count(), 0);
assert_ne!(m.sandboxed_execution_subprocess_rss.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_active_last_used;
assert_ne!(metric.get_sample_count(), 0);
assert_ne!(metric.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_evicted_last_used;
assert_eq!(metric.get_sample_count(), 0); // no eviction.
assert_eq!(metric.get_sample_sum(), 0.0);
let metric = &m.sandboxed_execution_subprocess_anon_rss_total;
assert_ne!(metric.get(), 0);
let metric = &m.sandboxed_execution_subprocess_memfd_rss_total;
assert_eq!(metric.get(), 0); // no memfd.
}
}
1 change: 1 addition & 0 deletions rs/execution_environment/src/hypervisor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ impl Hypervisor {
&embedder_config,
Arc::clone(&fd_factory),
Arc::clone(&state_reader),
true,
)
.expect("Failed to start sandboxed execution controller");
Arc::new(executor)
Expand Down

0 comments on commit 91dd6b3

Please sign in to comment.