From b6604a089d73d8bbd89edcb60e2659b833371bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20L=C3=B3pez?= Date: Tue, 21 Nov 2023 11:33:07 +0100 Subject: [PATCH] Add support for coalesced MMIO (KVM_CAP_COALESCED_MMIO) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for coalesced MMIO. This performance feature allows guest writes to port and memory space to not trigger VM exits. Instead, the kernel will write an entry into a shared ring buffer for each access, which userspace must consume. The ring buffer is mapped at a certain offset in the vcpu's file descriptor. In order to enable this capability, introduce the KvmCoalescedIoRing struct, which will act as a safe wrapper around the raw mapping of the ring buffer. Since users may not use coalesced MMIO, or it might not be available, store it as an Option in the VcpuFd struct. Signed-off-by: Carlos López --- CHANGELOG.md | 2 + src/cap.rs | 1 + src/ioctls/mod.rs | 99 +++++++++++++++++++++++++++++++++++++++++++++- src/ioctls/vcpu.rs | 60 +++++++++++++++++++++++++++- src/ioctls/vm.rs | 62 +++++++++++++++++++++++++++++ src/kvm_ioctls.rs | 14 +++++++ 6 files changed, 235 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d558de4c..96c78ab3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ reg_size as a public method. userspace MSR handling. - [[#246](https://github.com/rust-vmm/kvm-ioctls/pull/246)] Add support for userspace NMI injection (`KVM_NMI` ioctl). +- [[#244](https://github.com/rust-vmm/kvm-ioctls/pull/244)] add support for + coalesced MMIO (`KVM_CAP_COALESCED_MMIO` / `KVM_CAP_COALESCED_PIO`) # v0.15.0 diff --git a/src/cap.rs b/src/cap.rs index b8bfd15b..71b71817 100644 --- a/src/cap.rs +++ b/src/cap.rs @@ -158,6 +158,7 @@ pub enum Cap { DebugHwBps = KVM_CAP_GUEST_DEBUG_HW_BPS, DebugHwWps = KVM_CAP_GUEST_DEBUG_HW_WPS, GetMsrFeatures = KVM_CAP_GET_MSR_FEATURES, + CoalescedPio = KVM_CAP_COALESCED_PIO, #[cfg(target_arch = "aarch64")] ArmSve = KVM_CAP_ARM_SVE, #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] diff --git a/src/ioctls/mod.rs b/src/ioctls/mod.rs index 9079acc7..e3ed2f74 100644 --- a/src/ioctls/mod.rs +++ b/src/ioctls/mod.rs @@ -5,10 +5,13 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. +use std::mem::size_of; use std::os::unix::io::AsRawFd; use std::ptr::null_mut; -use kvm_bindings::kvm_run; +use kvm_bindings::{ + kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run, KVM_COALESCED_MMIO_PAGE_OFFSET, +}; use vmm_sys_util::errno; /// Wrappers over KVM device ioctls. @@ -26,6 +29,100 @@ pub mod vm; /// is otherwise a direct mapping to Result. pub type Result = std::result::Result; +/// A wrapper around the coalesced MMIO ring page. +#[derive(Debug)] +pub(crate) struct KvmCoalescedIoRing { + addr: *mut kvm_coalesced_mmio_ring, + page_size: usize, +} + +impl KvmCoalescedIoRing { + /// Maps the coalesced MMIO ring from the vCPU file descriptor. + pub(crate) fn mmap_from_fd(fd: &F) -> Result { + // SAFETY: We trust the sysconf libc function and we're calling it + // with a correct parameter. + let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { + -1 => return Err(errno::Error::last()), + ps => ps as usize, + }; + + let offset = KVM_COALESCED_MMIO_PAGE_OFFSET * page_size as u32; + // SAFETY: KVM guarantees that there is a page at offset + // KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE if the appropriate + // capability is available. If it is not, the call will simply + // fail. + let addr = unsafe { + libc::mmap( + null_mut(), + page_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + offset.into(), + ) + }; + if addr == libc::MAP_FAILED { + return Err(errno::Error::last()); + } + Ok(Self { + addr: addr.cast(), + page_size, + }) + } + + /// Compute the size of the MMIO ring. + /// Taken from [include/uapi/linux/kvm.h](https://elixir.bootlin.com/linux/v6.6/source/include/uapi/linux/kvm.h#L562) + const fn ring_max(&self) -> usize { + (self.page_size - size_of::()) / size_of::() + } + + /// Gets a mutable reference to the ring + fn ring_mut(&mut self) -> &mut kvm_coalesced_mmio_ring { + // SAFETY: We have a `&mut self` and the pointer is private, so this + // access is exclusive. + unsafe { &mut *self.addr } + } + + /// Reads a single entry from the MMIO ring. + /// + /// # Returns + /// + /// An entry from the MMIO ring buffer, or [`None`] if the ring is empty. + pub(crate) fn read_entry(&mut self) -> Option { + let ring_max = self.ring_max(); + + let ring = self.ring_mut(); + if ring.first == ring.last { + return None; + } + + let entries = ring.coalesced_mmio.as_ptr(); + // SAFETY: `ring.first` is an `u32` coming from mapped memory filled + // by the kernel, so we trust it. `entries` is a pointer coming from + // mmap(), so pointer arithmetic cannot overflow. We have a `&mut self`, + // so nobody else has access to the contents of the pointer. + let elem = unsafe { entries.add(ring.first as usize).read() }; + ring.first = (ring.first + 1) % ring_max as u32; + + Some(elem) + } +} + +impl Drop for KvmCoalescedIoRing { + fn drop(&mut self) { + // SAFETY: This is safe because we mmap the page ourselves, and nobody + // else is holding a reference to it. + unsafe { + libc::munmap(self.addr.cast(), self.page_size); + } + } +} + +// SAFETY: See safety comments about [`KvmRunWrapper`]. +unsafe impl Send for KvmCoalescedIoRing {} +// SAFETY: See safety comments about [`KvmRunWrapper`]. +unsafe impl Sync for KvmCoalescedIoRing {} + /// Safe wrapper over the `kvm_run` struct. /// /// The wrapper is needed for sending the pointer to `kvm_run` between diff --git a/src/ioctls/vcpu.rs b/src/ioctls/vcpu.rs index 1154dfca..c3097423 100644 --- a/src/ioctls/vcpu.rs +++ b/src/ioctls/vcpu.rs @@ -10,7 +10,7 @@ use libc::EINVAL; use std::fs::File; use std::os::unix::io::{AsRawFd, RawFd}; -use crate::ioctls::{KvmRunWrapper, Result}; +use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result}; use crate::kvm_ioctls::*; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use kvm_bindings::{ @@ -169,6 +169,8 @@ pub enum VcpuExit<'a> { pub struct VcpuFd { vcpu: File, kvm_run_ptr: KvmRunWrapper, + /// A pointer to the coalesced MMIO page + coalesced_mmio_ring: Option, } /// KVM Sync Registers used to tell KVM which registers to sync @@ -1849,6 +1851,55 @@ impl VcpuFd { _ => Err(errno::Error::last()), } } + + /// Maps the coalesced MMIO ring page. This allows reading entries from + /// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read). + /// + /// # Returns + /// + /// Returns an error if the buffer could not be mapped, usually because + /// `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio)) + /// is not available. + /// + /// # Examples + /// + /// ```rust + /// # use kvm_ioctls::{Kvm, Cap}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::CoalescedMmio) { + /// vcpu.map_coalesced_mmio_ring().unwrap(); + /// } + /// ``` + pub fn map_coalesced_mmio_ring(&mut self) -> Result<()> { + if self.coalesced_mmio_ring.is_none() { + let ring = KvmCoalescedIoRing::mmap_from_fd(&self.vcpu)?; + self.coalesced_mmio_ring = Some(ring); + } + Ok(()) + } + + /// Read a single entry from the coalesced MMIO ring. + /// For entries to be appended to the ring by the kernel, addresses must be registered + /// via [`VmFd::register_coalesced_mmio()`](crate::VmFd::register_coalesced_mmio()). + /// + /// [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) must have been called beforehand. + /// + /// See the documentation for `KVM_(UN)REGISTER_COALESCED_MMIO`. + /// + /// # Returns + /// + /// * An error if [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) + /// was not called beforehand. + /// * [`Ok`] if the ring is empty. + /// * [`Ok>`] if an entry was successfully read. + pub fn coalesced_mmio_read(&mut self) -> Result> { + self.coalesced_mmio_ring + .as_mut() + .ok_or(errno::Error::new(libc::EIO)) + .map(|ring| ring.read_entry()) + } } /// Helper function to create a new `VcpuFd`. @@ -1857,7 +1908,11 @@ impl VcpuFd { /// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because /// then it would be exported with the public `VcpuFd` interface. pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd { - VcpuFd { vcpu, kvm_run_ptr } + VcpuFd { + vcpu, + kvm_run_ptr, + coalesced_mmio_ring: None, + } } impl AsRawFd for VcpuFd { @@ -2440,6 +2495,7 @@ mod tests { kvm_run_ptr: mmap_anonymous(10), mmap_size: 10, }, + coalesced_mmio_ring: None, }; assert_eq!(faulty_vcpu_fd.get_regs().unwrap_err().errno(), badf_errno); diff --git a/src/ioctls/vm.rs b/src/ioctls/vm.rs index 2663cb42..813c490c 100644 --- a/src/ioctls/vm.rs +++ b/src/ioctls/vm.rs @@ -1562,6 +1562,68 @@ impl VmFd { Err(errno::Error::last()) } } + + /// Registers an address for coalesced MMIO. Write accesses to the address + /// will not cause a corresponding [`VcpuExit`](crate::VcpuExit), but + /// instead will be appended to the MMIO ring buffer. The [`VcpuFd`] can + /// read entries in the ring buffer via [`VcpuFd::coalesced_mmio_read()`]. + /// If entries are not read the buffer will eventually be full, + /// preventing further elements from being appended by the kernel. + /// + /// Needs `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio)) + /// and/or `KVM_CAP_COALESCED_PIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedPio)). + /// + /// See the documentation for `KVM_REGISTER_COALESCED_MMIO`. + /// + /// # Arguments + /// + /// * `addr` - Address being written to. + /// * `size` - The size of the write for the mechanism to trigger. + pub fn register_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> { + let (addr, pio) = match addr { + IoEventAddress::Pio(addr) => (addr, 1), + IoEventAddress::Mmio(addr) => (addr, 0), + }; + let mut zone = kvm_coalesced_mmio_zone { + addr, + size, + ..Default::default() + }; + zone.__bindgen_anon_1.pio = pio; + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_REGISTER_COALESCED_MMIO(), &zone) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } + + /// Unregister an address that was previously registered via + /// [`register_coalesced_mmio()`](VmFd::register_coalesced_mmio). + /// + /// See the documentation for `KVM_UNREGISTER_COALESCED_MMIO`. + pub fn unregister_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> { + let (addr, pio) = match addr { + IoEventAddress::Pio(addr) => (addr, 1), + IoEventAddress::Mmio(addr) => (addr, 0), + }; + let mut zone = kvm_coalesced_mmio_zone { + addr, + size, + ..Default::default() + }; + zone.__bindgen_anon_1.pio = pio; + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(self, KVM_UNREGISTER_COALESCED_MMIO(), &zone) }; + if ret != 0 { + return Err(errno::Error::last()); + } + Ok(()) + } } /// Helper function to create a new `VmFd`. diff --git a/src/kvm_ioctls.rs b/src/kvm_ioctls.rs index a1737782..397bb14d 100644 --- a/src/kvm_ioctls.rs +++ b/src/kvm_ioctls.rs @@ -61,6 +61,20 @@ ioctl_io_nr!(KVM_CREATE_IRQCHIP, KVMIO, 0x60); target_arch = "aarch64" ))] ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level); +/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */ +ioctl_iow_nr!( + KVM_REGISTER_COALESCED_MMIO, + KVMIO, + 0x67, + kvm_coalesced_mmio_zone +); +/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */ +ioctl_iow_nr!( + KVM_UNREGISTER_COALESCED_MMIO, + KVMIO, + 0x68, + kvm_coalesced_mmio_zone +); /* Available with KVM_CAP_IRQ_ROUTING */ #[cfg(any( target_arch = "x86",