diff --git a/CHANGELOG.md b/CHANGELOG.md index d558de4c..96c78ab3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ reg_size as a public method. userspace MSR handling. - [[#246](https://github.com/rust-vmm/kvm-ioctls/pull/246)] Add support for userspace NMI injection (`KVM_NMI` ioctl). +- [[#244](https://github.com/rust-vmm/kvm-ioctls/pull/244)] add support for + coalesced MMIO (`KVM_CAP_COALESCED_MMIO` / `KVM_CAP_COALESCED_PIO`) # v0.15.0 diff --git a/src/cap.rs b/src/cap.rs index b8bfd15b..71b71817 100644 --- a/src/cap.rs +++ b/src/cap.rs @@ -158,6 +158,7 @@ pub enum Cap { DebugHwBps = KVM_CAP_GUEST_DEBUG_HW_BPS, DebugHwWps = KVM_CAP_GUEST_DEBUG_HW_WPS, GetMsrFeatures = KVM_CAP_GET_MSR_FEATURES, + CoalescedPio = KVM_CAP_COALESCED_PIO, #[cfg(target_arch = "aarch64")] ArmSve = KVM_CAP_ARM_SVE, #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] diff --git a/src/ioctls/mod.rs b/src/ioctls/mod.rs index 9079acc7..e3ed2f74 100644 --- a/src/ioctls/mod.rs +++ b/src/ioctls/mod.rs @@ -5,10 +5,13 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. +use std::mem::size_of; use std::os::unix::io::AsRawFd; use std::ptr::null_mut; -use kvm_bindings::kvm_run; +use kvm_bindings::{ + kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run, KVM_COALESCED_MMIO_PAGE_OFFSET, +}; use vmm_sys_util::errno; /// Wrappers over KVM device ioctls. @@ -26,6 +29,100 @@ pub mod vm; /// is otherwise a direct mapping to Result. pub type Result = std::result::Result; +/// A wrapper around the coalesced MMIO ring page. +#[derive(Debug)] +pub(crate) struct KvmCoalescedIoRing { + addr: *mut kvm_coalesced_mmio_ring, + page_size: usize, +} + +impl KvmCoalescedIoRing { + /// Maps the coalesced MMIO ring from the vCPU file descriptor. + pub(crate) fn mmap_from_fd(fd: &F) -> Result { + // SAFETY: We trust the sysconf libc function and we're calling it + // with a correct parameter. + let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { + -1 => return Err(errno::Error::last()), + ps => ps as usize, + }; + + let offset = KVM_COALESCED_MMIO_PAGE_OFFSET * page_size as u32; + // SAFETY: KVM guarantees that there is a page at offset + // KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE if the appropriate + // capability is available. If it is not, the call will simply + // fail. + let addr = unsafe { + libc::mmap( + null_mut(), + page_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + offset.into(), + ) + }; + if addr == libc::MAP_FAILED { + return Err(errno::Error::last()); + } + Ok(Self { + addr: addr.cast(), + page_size, + }) + } + + /// Compute the size of the MMIO ring. + /// Taken from [include/uapi/linux/kvm.h](https://elixir.bootlin.com/linux/v6.6/source/include/uapi/linux/kvm.h#L562) + const fn ring_max(&self) -> usize { + (self.page_size - size_of::()) / size_of::() + } + + /// Gets a mutable reference to the ring + fn ring_mut(&mut self) -> &mut kvm_coalesced_mmio_ring { + // SAFETY: We have a `&mut self` and the pointer is private, so this + // access is exclusive. + unsafe { &mut *self.addr } + } + + /// Reads a single entry from the MMIO ring. + /// + /// # Returns + /// + /// An entry from the MMIO ring buffer, or [`None`] if the ring is empty. + pub(crate) fn read_entry(&mut self) -> Option { + let ring_max = self.ring_max(); + + let ring = self.ring_mut(); + if ring.first == ring.last { + return None; + } + + let entries = ring.coalesced_mmio.as_ptr(); + // SAFETY: `ring.first` is an `u32` coming from mapped memory filled + // by the kernel, so we trust it. `entries` is a pointer coming from + // mmap(), so pointer arithmetic cannot overflow. We have a `&mut self`, + // so nobody else has access to the contents of the pointer. + let elem = unsafe { entries.add(ring.first as usize).read() }; + ring.first = (ring.first + 1) % ring_max as u32; + + Some(elem) + } +} + +impl Drop for KvmCoalescedIoRing { + fn drop(&mut self) { + // SAFETY: This is safe because we mmap the page ourselves, and nobody + // else is holding a reference to it. + unsafe { + libc::munmap(self.addr.cast(), self.page_size); + } + } +} + +// SAFETY: See safety comments about [`KvmRunWrapper`]. +unsafe impl Send for KvmCoalescedIoRing {} +// SAFETY: See safety comments about [`KvmRunWrapper`]. +unsafe impl Sync for KvmCoalescedIoRing {} + /// Safe wrapper over the `kvm_run` struct. /// /// The wrapper is needed for sending the pointer to `kvm_run` between diff --git a/src/ioctls/vcpu.rs b/src/ioctls/vcpu.rs index 1154dfca..c3097423 100644 --- a/src/ioctls/vcpu.rs +++ b/src/ioctls/vcpu.rs @@ -10,7 +10,7 @@ use libc::EINVAL; use std::fs::File; use std::os::unix::io::{AsRawFd, RawFd}; -use crate::ioctls::{KvmRunWrapper, Result}; +use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result}; use crate::kvm_ioctls::*; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use kvm_bindings::{ @@ -169,6 +169,8 @@ pub enum VcpuExit<'a> { pub struct VcpuFd { vcpu: File, kvm_run_ptr: KvmRunWrapper, + /// A pointer to the coalesced MMIO page + coalesced_mmio_ring: Option, } /// KVM Sync Registers used to tell KVM which registers to sync @@ -1849,6 +1851,55 @@ impl VcpuFd { _ => Err(errno::Error::last()), } } + + /// Maps the coalesced MMIO ring page. This allows reading entries from + /// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read). + /// + /// # Returns + /// + /// Returns an error if the buffer could not be mapped, usually because + /// `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio)) + /// is not available. + /// + /// # Examples + /// + /// ```rust + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::CoalescedMmio) { + /// vcpu.map_coalesced_mmio_ring().unwrap(); + /// } + /// ``` + pub fn map_coalesced_mmio_ring(&mut self) -> Result<()> { + if self.coalesced_mmio_ring.is_none() { + let ring = KvmCoalescedIoRing::mmap_from_fd(&self.vcpu)?; + self.coalesced_mmio_ring = Some(ring); + } + Ok(()) + } + + /// Read a single entry from the coalesced MMIO ring. + /// For entries to be appended to the ring by the kernel, addresses must be registered + /// via [`VmFd::register_coalesced_mmio()`](crate::VmFd::register_coalesced_mmio()). + /// + /// [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) must have been called beforehand. + /// + /// See the documentation for `KVM_(UN)REGISTER_COALESCED_MMIO`. + /// + /// # Returns + /// + /// * An error if [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) + /// was not called beforehand. + /// * [`Ok`] if the ring is empty. + /// * [`Ok>`] if an entry was successfully read. + pub fn coalesced_mmio_read(&mut self) -> Result> { + self.coalesced_mmio_ring + .as_mut() + .ok_or(errno::Error::new(libc::EIO)) + .map(|ring| ring.read_entry()) + } } /// Helper function to create a new `VcpuFd`. @@ -1857,7 +1908,11 @@ impl VcpuFd { /// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because /// then it would be exported with the public `VcpuFd` interface. pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd { - VcpuFd { vcpu, kvm_run_ptr } + VcpuFd { + vcpu, + kvm_run_ptr, + coalesced_mmio_ring: None, + } } impl AsRawFd for VcpuFd { @@ -2440,6 +2495,7 @@ mod tests { kvm_run_ptr: mmap_anonymous(10), mmap_size: 10, }, + coalesced_mmio_ring: None, }; assert_eq!(faulty_vcpu_fd.get_regs().unwrap_err().errno(), badf_errno); diff --git a/src/ioctls/vm.rs b/src/ioctls/vm.rs index 2663cb42..813c490c 100644 --- a/src/ioctls/vm.rs +++ b/src/ioctls/vm.rs @@ -1562,6 +1562,68 @@ impl VmFd { Err(errno::Error::last()) } } + + /// Registers an address for coalesced MMIO. Write accesses to the address + /// will not cause a corresponding [`VcpuExit`](crate::VcpuExit), but + /// instead will be appended to the MMIO ring buffer. The [`VcpuFd`] can + /// read entries in the ring buffer via [`VcpuFd::coalesced_mmio_read()`]. + /// If entries are not read the buffer will eventually be full, + /// preventing further elements from being appended by the kernel. + /// + /// Needs `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio)) + /// and/or `KVM_CAP_COALESCED_PIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedPio)). + /// + /// See the documentation for `KVM_REGISTER_COALESCED_MMIO`. + /// + /// # Arguments + /// + /// * `addr` - Address being written to. + /// * `size` - The size of the write for the mechanism to trigger. + pub fn register_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> { + let (addr, pio) = match addr { + IoEventAddress::Pio(addr) => (addr, 1), + IoEventAddress::Mmio(addr) => (addr, 0), + }; + let mut zone = kvm_coalesced_mmio_zone { + addr, + size, + ..Default::default() + }; + zone.__bindgen_anon_1.pio = pio; + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_REGISTER_COALESCED_MMIO(), &zone) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Unregister an address that was previously registered via + /// [`register_coalesced_mmio()`](VmFd::register_coalesced_mmio). + /// + /// See the documentation for `KVM_UNREGISTER_COALESCED_MMIO`. + pub fn unregister_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> { + let (addr, pio) = match addr { + IoEventAddress::Pio(addr) => (addr, 1), + IoEventAddress::Mmio(addr) => (addr, 0), + }; + let mut zone = kvm_coalesced_mmio_zone { + addr, + size, + ..Default::default() + }; + zone.__bindgen_anon_1.pio = pio; + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_UNREGISTER_COALESCED_MMIO(), &zone) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } } /// Helper function to create a new `VmFd`. diff --git a/src/kvm_ioctls.rs b/src/kvm_ioctls.rs index a1737782..397bb14d 100644 --- a/src/kvm_ioctls.rs +++ b/src/kvm_ioctls.rs @@ -61,6 +61,20 @@ ioctl_io_nr!(KVM_CREATE_IRQCHIP, KVMIO, 0x60); target_arch = "aarch64" ))] ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level); +/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */ +ioctl_iow_nr!( + KVM_REGISTER_COALESCED_MMIO, + KVMIO, + 0x67, + kvm_coalesced_mmio_zone +); +/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */ +ioctl_iow_nr!( + KVM_UNREGISTER_COALESCED_MMIO, + KVMIO, + 0x68, + kvm_coalesced_mmio_zone +); /* Available with KVM_CAP_IRQ_ROUTING */ #[cfg(any( target_arch = "x86",