diff --git a/Cargo.lock b/Cargo.lock index 22d20ec40f..0d20b0b9ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,17 @@ dependencies = [ "cortex-a", ] +[[package]] +name = "ahash" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57e6e951cfbb2db8de1828d49073a113a29fd7117b1596caa781a258c7e38d72" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + [[package]] name = "anyhow" version = "1.0.61" @@ -127,6 +138,12 @@ dependencies = [ "scroll", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hermit-entry" version = "0.9.1" @@ -168,11 +185,13 @@ name = "libhermit-rs" version = "0.5.0" dependencies = [ "aarch64", + "ahash", "async-task", "bitflags", "crossbeam-utils", "float-cmp", "futures-lite", + "hashbrown", "hermit-entry", "include-transformed", "log", @@ -583,6 +602,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 2dbc15488c..e6052cb317 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,8 +67,10 @@ dhcpv4 = [ ] [dependencies] +ahash = { version = "0.8", default-features = false } bitflags = "1.3" crossbeam-utils = { version = "0.8", default-features = false } +hashbrown = { version = "0.12", default-features = false } hermit-entry = { version = "0.9", features = ["kernel"] } include-transformed = { version = "0.2", optional = true } log = { version = "0.4", default-features = false } diff --git a/src/lib.rs b/src/lib.rs index 24108ff45c..db9852bc46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ #![allow(incomplete_features)] #![feature(abi_x86_interrupt)] #![feature(allocator_api)] +#![feature(atomic_mut_ptr)] #![feature(asm_const)] #![feature(asm_sym)] #![feature(const_btree_new)] @@ -23,6 +24,8 @@ #![feature(alloc_error_handler)] #![feature(vec_into_raw_parts)] #![feature(drain_filter)] +#![feature(strict_provenance)] +#![feature(is_some_with)] #![no_std] #![cfg_attr(target_os = "none", feature(custom_test_frameworks))] #![cfg_attr(target_os = "none", cfg_attr(test, test_runner(crate::test_runner)))] diff --git a/src/scheduler/task.rs b/src/scheduler/task.rs index 069fb23221..81c6c3ae67 100644 --- a/src/scheduler/task.rs +++ b/src/scheduler/task.rs @@ -140,6 +140,7 @@ impl PartialEq for TaskHandle { impl Eq for TaskHandle {} /// Realize a priority queue for task handles +#[derive(Default)] pub struct TaskHandlePriorityQueue { queues: [Option>; NO_PRIORITIES], prio_bitmap: u64, @@ -158,6 +159,11 @@ impl TaskHandlePriorityQueue { } } + /// Checks if the queue is empty. + pub fn is_empty(&self) -> bool { + self.prio_bitmap == 0 + } + /// Add a task handle by its priority to the queue pub fn push(&mut self, task: TaskHandle) { let i = task.priority.into() as usize; @@ -196,16 +202,19 @@ impl TaskHandlePriorityQueue { None } - /// Remove a specific task handle from the priority queue. - pub fn remove(&mut self, task: TaskHandle) { + /// Remove a specific task handle from the priority queue. Returns `true` if + /// the handle was in the queue. + pub fn remove(&mut self, task: TaskHandle) -> bool { let queue_index = task.priority.into() as usize; //assert!(queue_index < NO_PRIORITIES, "Priority {} is too high", queue_index); + let mut success = false; if let Some(queue) = &mut self.queues[queue_index] { let mut i = 0; while i != queue.len() { if queue[i].id == task.id { queue.remove(i); + success = true; } else { i += 1; } @@ -215,6 +224,8 @@ impl TaskHandlePriorityQueue { self.prio_bitmap &= !(1 << queue_index as u64); } } + + success } } diff --git a/src/synch/futex.rs b/src/synch/futex.rs new file mode 100644 index 0000000000..7aae64cd8c --- /dev/null +++ b/src/synch/futex.rs @@ -0,0 +1,107 @@ +use ahash::RandomState; +use core::sync::atomic::{AtomicU32, Ordering::SeqCst}; +use hashbrown::{hash_map::Entry, HashMap}; + +use crate::{ + arch::kernel::{percore::core_scheduler, processor::get_timer_ticks}, + errno::{EAGAIN, EINVAL, ETIMEDOUT}, + scheduler::task::TaskHandlePriorityQueue, +}; + +use super::spinlock::SpinlockIrqSave; + +// TODO: Replace with a concurrent hashmap. +static PARKING_LOT: SpinlockIrqSave> = + SpinlockIrqSave::new(HashMap::with_hasher(RandomState::with_seeds(0, 0, 0, 0))); + +bitflags! { + pub struct Flags: u32 { + /// Use a relative timeout + const RELATIVE = 0b01; + } +} + +/// If the value at address matches the expected value, park the current thread until it is either +/// woken up with `futex_wake` (returns 0) or the specified timeout elapses (returns -ETIMEDOUT). +/// +/// The timeout is given in microseconds. If [`Flags::RELATIVE`] is given, it is interpreted as +/// relative to the current time. Otherwise it is understood to be an absolute time +/// (see `get_timer_ticks`). +pub fn futex_wait(address: &AtomicU32, expected: u32, timeout: Option, flags: Flags) -> i32 { + let mut parking_lot = PARKING_LOT.lock(); + // Check the futex value after locking the parking lot so that all changes are observed. + if address.load(SeqCst) != expected { + return -EAGAIN; + } + + let wakeup_time = if flags.contains(Flags::RELATIVE) { + timeout.and_then(|t| get_timer_ticks().checked_add(t)) + } else { + timeout + }; + + let scheduler = core_scheduler(); + scheduler.block_current_task(wakeup_time); + let handle = scheduler.get_current_task_handle(); + parking_lot + .entry(address.as_mut_ptr().addr()) + .or_default() + .push(handle); + drop(parking_lot); + + loop { + scheduler.reschedule(); + + // Try to remove ourselves from the waiting queue. + let mut parking_lot = PARKING_LOT.lock(); + let mut wakeup = true; + if let Entry::Occupied(mut queue) = parking_lot.entry(address.as_mut_ptr().addr()) { + // If we are not in the waking queue, this must have been a wakeup. + wakeup = !queue.get_mut().remove(handle); + if queue.get().is_empty() { + queue.remove(); + } + }; + + if wakeup { + return 0; + } else if wakeup_time.is_some_and(|&t| t <= get_timer_ticks()) { + // If the current time is past the wakeup time, the operation timed out. + return -ETIMEDOUT; + } + + // A spurious wakeup occurred, sleep again. + scheduler.block_current_task(wakeup_time); + } +} + +/// Wake `count` threads waiting on the futex at address. Returns the number of threads +/// woken up (saturates to `i32::MAX`). If `count` is `i32::MAX`, wake up all matching +/// waiting threads. If `count` is negative, returns -EINVAL. +pub fn futex_wake(address: &AtomicU32, count: i32) -> i32 { + if count < 0 { + return -EINVAL; + } + + let mut parking_lot = PARKING_LOT.lock(); + let mut queue = match parking_lot.entry(address.as_mut_ptr().addr()) { + Entry::Occupied(entry) => entry, + Entry::Vacant(_) => return 0, + }; + + let scheduler = core_scheduler(); + let mut woken = 0; + while woken != count || count == i32::MAX { + match queue.get_mut().pop() { + Some(handle) => scheduler.custom_wakeup(handle), + None => break, + } + woken = woken.saturating_add(1); + } + + if queue.get().is_empty() { + queue.remove(); + } + + woken +} diff --git a/src/synch/mod.rs b/src/synch/mod.rs index 6133381d61..05c4d4db8d 100644 --- a/src/synch/mod.rs +++ b/src/synch/mod.rs @@ -1,5 +1,6 @@ //! Synchronization primitives +pub mod futex; pub mod recmutex; pub mod semaphore; pub mod spinlock; diff --git a/src/syscalls/futex.rs b/src/syscalls/futex.rs new file mode 100644 index 0000000000..ffb5f5b81f --- /dev/null +++ b/src/syscalls/futex.rs @@ -0,0 +1,67 @@ +use core::sync::atomic::AtomicU32; + +use crate::{ + errno::EINVAL, + synch::futex::{self as synch, Flags}, + timespec, timespec_to_microseconds, +}; + +/// Like `synch::futex_wait`, but does extra sanity checks and takes a `timespec`. +/// +/// Returns -EINVAL if +/// * `address` is null +/// * `timeout` is negative +/// * `flags` contains unknown flags +extern "C" fn __sys_futex_wait( + address: *mut u32, + expected: u32, + timeout: *const timespec, + flags: u32, +) -> i32 { + if address.is_null() { + return -EINVAL; + } + + let address = unsafe { &*(address as *const AtomicU32) }; + let timeout = if timeout.is_null() { + None + } else { + match timespec_to_microseconds(unsafe { timeout.read() }) { + t @ Some(_) => t, + None => return -EINVAL, + } + }; + let flags = match Flags::from_bits(flags) { + Some(flags) => flags, + None => return -EINVAL, + }; + + synch::futex_wait(address, expected, timeout, flags) +} + +#[no_mangle] +pub extern "C" fn sys_futex_wait( + address: *mut u32, + expected: u32, + timeout: *const timespec, + flags: u32, +) -> i32 { + kernel_function!(__sys_futex_wait(address, expected, timeout, flags)) +} + +/// Like `synch::futex_wake`, but does extra sanity checks. +/// +/// Returns -EINVAL if `address` is null. +extern "C" fn __sys_futex_wake(address: *mut u32, count: i32) -> i32 { + if address.is_null() { + return -EINVAL; + } + + let address = unsafe { &*(address as *const AtomicU32) }; + synch::futex_wake(address, count) +} + +#[no_mangle] +pub extern "C" fn sys_futex_wake(address: *mut u32, count: i32) -> i32 { + kernel_function!(__sys_futex_wake(address, count)) +} diff --git a/src/syscalls/mod.rs b/src/syscalls/mod.rs index 21a6873cbc..d631357de4 100644 --- a/src/syscalls/mod.rs +++ b/src/syscalls/mod.rs @@ -8,6 +8,7 @@ use crate::syscalls::interfaces::SyscallInterface; use crate::{__sys_free, __sys_malloc, __sys_realloc}; pub use self::condvar::*; +pub use self::futex::*; pub use self::processor::*; pub use self::random::*; pub use self::recmutex::*; @@ -19,6 +20,7 @@ pub use self::timer::*; mod condvar; pub(crate) mod fs; +mod futex; mod interfaces; #[cfg(feature = "newlib")] mod lwip; diff --git a/src/syscalls/timer.rs b/src/syscalls/timer.rs index 8e2d2dab98..849ed1becb 100644 --- a/src/syscalls/timer.rs +++ b/src/syscalls/timer.rs @@ -39,6 +39,13 @@ fn microseconds_to_timeval(microseconds: u64, result: &mut timeval) { result.tv_usec = (microseconds % 1_000_000) as i64; } +pub(crate) fn timespec_to_microseconds(time: timespec) -> Option { + u64::try_from(time.tv_sec) + .ok() + .and_then(|secs| secs.checked_mul(1_000_000)) + .and_then(|millions| millions.checked_add(u64::try_from(time.tv_nsec).ok()? / 1000)) +} + extern "C" fn __sys_clock_getres(clock_id: u64, res: *mut timespec) -> i32 { assert!( !res.is_null(), diff --git a/tests/thread.rs b/tests/thread.rs index 70884e3e2f..785544fd34 100644 --- a/tests/thread.rs +++ b/tests/thread.rs @@ -8,11 +8,19 @@ extern crate hermit; +use core::{ + ptr, + sync::atomic::{AtomicU32, Ordering::Relaxed}, +}; + use common::*; mod common; use alloc::vec; -use hermit::{sys_join, sys_spawn2, sys_usleep}; +use hermit::{ + errno::{EAGAIN, ETIMEDOUT}, + sys_futex_wait, sys_futex_wake, sys_join, sys_spawn2, sys_usleep, timespec, +}; const USER_STACK_SIZE: usize = 1_048_576; const NORMAL_PRIO: u8 = 2; @@ -40,6 +48,48 @@ pub fn thread_test() { } } +extern "C" fn waker_func(futex: usize) { + let futex = unsafe { &*(futex as *const AtomicU32) }; + + sys_usleep(100_000); + + futex.store(1, Relaxed); + let ret = sys_futex_wake(futex as *const AtomicU32 as *mut u32, i32::MAX); + assert_eq!(ret, 1); +} + +#[test_case] +pub fn test_futex() { + let futex = AtomicU32::new(0); + let futex_ptr = &futex as *const AtomicU32 as *mut u32; + + let ret = sys_futex_wait(futex_ptr, 1, ptr::null(), 0); + assert_eq!(ret, -EAGAIN); + + let timeout = timespec { + tv_sec: 0, + tv_nsec: 100_000_000, + }; + let ret = sys_futex_wait(futex_ptr, 0, &timeout, 1); + assert_eq!(ret, -ETIMEDOUT); + + let waker = sys_spawn2( + waker_func, + futex_ptr as usize, + NORMAL_PRIO, + USER_STACK_SIZE, + -1, + ); + assert!(waker >= 0); + + let ret = sys_futex_wait(futex_ptr, 0, ptr::null(), 0); + assert_eq!(ret, 0); + assert_eq!(futex.load(Relaxed), 1); + + let ret = sys_join(waker); + assert_eq!(ret, 0); +} + #[test_case] pub fn test_thread_local() { #[repr(C, align(0x1000))]