Skip to content

Commit

Permalink
Add support for coalesced MMIO (KVM_CAP_COALESCED_MMIO)
Browse files Browse the repository at this point in the history
Add support for coalesced MMIO. This performance feature allows guest
writes to port and memory space to not trigger VM exits. Instead, the
kernel will write an entry into a shared ring buffer for each access,
which userspace must consume. The ring buffer is mapped at a certain
offset in the vcpu's file descriptor.

In order to enable this capability, introduce the KvmCoalescedIoRing
struct, which will act as a safe wrapper around the raw mapping of
the ring buffer. Since users may not use coalesced MMIO, or it might
not be available, store it as an Option in the VcpuFd struct.

Signed-off-by: Carlos López <carlos.lopez@suse.com>
  • Loading branch information
00xc authored and JonathanWoollett-Light committed Dec 15, 2023
1 parent 2a102e7 commit b6604a0
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ reg_size as a public method.
userspace MSR handling.
- [[#246](https://github.com/rust-vmm/kvm-ioctls/pull/246)] Add support for
userspace NMI injection (`KVM_NMI` ioctl).
- [[#244](https://github.com/rust-vmm/kvm-ioctls/pull/244)] add support for
coalesced MMIO (`KVM_CAP_COALESCED_MMIO` / `KVM_CAP_COALESCED_PIO`)

# v0.15.0

Expand Down
1 change: 1 addition & 0 deletions src/cap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ pub enum Cap {
DebugHwBps = KVM_CAP_GUEST_DEBUG_HW_BPS,
DebugHwWps = KVM_CAP_GUEST_DEBUG_HW_WPS,
GetMsrFeatures = KVM_CAP_GET_MSR_FEATURES,
CoalescedPio = KVM_CAP_COALESCED_PIO,
#[cfg(target_arch = "aarch64")]
ArmSve = KVM_CAP_ARM_SVE,
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Expand Down
99 changes: 98 additions & 1 deletion src/ioctls/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.

use std::mem::size_of;
use std::os::unix::io::AsRawFd;
use std::ptr::null_mut;

use kvm_bindings::kvm_run;
use kvm_bindings::{
kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run, KVM_COALESCED_MMIO_PAGE_OFFSET,
};
use vmm_sys_util::errno;

/// Wrappers over KVM device ioctls.
Expand All @@ -26,6 +29,100 @@ pub mod vm;
/// is otherwise a direct mapping to Result.
pub type Result<T> = std::result::Result<T, errno::Error>;

/// A wrapper around the coalesced MMIO ring page.
#[derive(Debug)]
pub(crate) struct KvmCoalescedIoRing {
addr: *mut kvm_coalesced_mmio_ring,
page_size: usize,
}

impl KvmCoalescedIoRing {
/// Maps the coalesced MMIO ring from the vCPU file descriptor.
pub(crate) fn mmap_from_fd<F: AsRawFd>(fd: &F) -> Result<Self> {
// SAFETY: We trust the sysconf libc function and we're calling it
// with a correct parameter.
let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } {
-1 => return Err(errno::Error::last()),
ps => ps as usize,
};

let offset = KVM_COALESCED_MMIO_PAGE_OFFSET * page_size as u32;
// SAFETY: KVM guarantees that there is a page at offset
// KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE if the appropriate
// capability is available. If it is not, the call will simply
// fail.
let addr = unsafe {
libc::mmap(
null_mut(),
page_size,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED,
fd.as_raw_fd(),
offset.into(),
)
};
if addr == libc::MAP_FAILED {
return Err(errno::Error::last());
}
Ok(Self {
addr: addr.cast(),
page_size,
})
}

/// Compute the size of the MMIO ring.
/// Taken from [include/uapi/linux/kvm.h](https://elixir.bootlin.com/linux/v6.6/source/include/uapi/linux/kvm.h#L562)
const fn ring_max(&self) -> usize {
(self.page_size - size_of::<kvm_coalesced_mmio_ring>()) / size_of::<kvm_coalesced_mmio>()
}

/// Gets a mutable reference to the ring
fn ring_mut(&mut self) -> &mut kvm_coalesced_mmio_ring {
// SAFETY: We have a `&mut self` and the pointer is private, so this
// access is exclusive.
unsafe { &mut *self.addr }
}

/// Reads a single entry from the MMIO ring.
///
/// # Returns
///
/// An entry from the MMIO ring buffer, or [`None`] if the ring is empty.
pub(crate) fn read_entry(&mut self) -> Option<kvm_coalesced_mmio> {
let ring_max = self.ring_max();

let ring = self.ring_mut();
if ring.first == ring.last {
return None;
}

let entries = ring.coalesced_mmio.as_ptr();
// SAFETY: `ring.first` is an `u32` coming from mapped memory filled
// by the kernel, so we trust it. `entries` is a pointer coming from
// mmap(), so pointer arithmetic cannot overflow. We have a `&mut self`,
// so nobody else has access to the contents of the pointer.
let elem = unsafe { entries.add(ring.first as usize).read() };
ring.first = (ring.first + 1) % ring_max as u32;

Some(elem)
}
}

impl Drop for KvmCoalescedIoRing {
fn drop(&mut self) {
// SAFETY: This is safe because we mmap the page ourselves, and nobody
// else is holding a reference to it.
unsafe {
libc::munmap(self.addr.cast(), self.page_size);
}
}
}

// SAFETY: See safety comments about [`KvmRunWrapper`].
unsafe impl Send for KvmCoalescedIoRing {}
// SAFETY: See safety comments about [`KvmRunWrapper`].
unsafe impl Sync for KvmCoalescedIoRing {}

/// Safe wrapper over the `kvm_run` struct.
///
/// The wrapper is needed for sending the pointer to `kvm_run` between
Expand Down
60 changes: 58 additions & 2 deletions src/ioctls/vcpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use libc::EINVAL;
use std::fs::File;
use std::os::unix::io::{AsRawFd, RawFd};

use crate::ioctls::{KvmRunWrapper, Result};
use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result};
use crate::kvm_ioctls::*;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use kvm_bindings::{
Expand Down Expand Up @@ -169,6 +169,8 @@ pub enum VcpuExit<'a> {
pub struct VcpuFd {
vcpu: File,
kvm_run_ptr: KvmRunWrapper,
/// A pointer to the coalesced MMIO page
coalesced_mmio_ring: Option<KvmCoalescedIoRing>,
}

/// KVM Sync Registers used to tell KVM which registers to sync
Expand Down Expand Up @@ -1849,6 +1851,55 @@ impl VcpuFd {
_ => Err(errno::Error::last()),
}
}

/// Maps the coalesced MMIO ring page. This allows reading entries from
/// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read).
///
/// # Returns
///
/// Returns an error if the buffer could not be mapped, usually because
/// `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio))
/// is not available.
///
/// # Examples
///
/// ```rust
/// # use kvm_ioctls::{Kvm, Cap};
/// let kvm = Kvm::new().unwrap();
/// let vm = kvm.create_vm().unwrap();
/// let mut vcpu = vm.create_vcpu(0).unwrap();
/// if kvm.check_extension(Cap::CoalescedMmio) {
/// vcpu.map_coalesced_mmio_ring().unwrap();
/// }
/// ```
pub fn map_coalesced_mmio_ring(&mut self) -> Result<()> {
if self.coalesced_mmio_ring.is_none() {
let ring = KvmCoalescedIoRing::mmap_from_fd(&self.vcpu)?;
self.coalesced_mmio_ring = Some(ring);
}
Ok(())
}

/// Read a single entry from the coalesced MMIO ring.
/// For entries to be appended to the ring by the kernel, addresses must be registered
/// via [`VmFd::register_coalesced_mmio()`](crate::VmFd::register_coalesced_mmio()).
///
/// [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring) must have been called beforehand.
///
/// See the documentation for `KVM_(UN)REGISTER_COALESCED_MMIO`.
///
/// # Returns
///
/// * An error if [`map_coalesced_mmio_ring()`](VcpuFd::map_coalesced_mmio_ring)
/// was not called beforehand.
/// * [`Ok<None>`] if the ring is empty.
/// * [`Ok<Some<kvm_coalesced_mmio>>`] if an entry was successfully read.
pub fn coalesced_mmio_read(&mut self) -> Result<Option<kvm_coalesced_mmio>> {
self.coalesced_mmio_ring
.as_mut()
.ok_or(errno::Error::new(libc::EIO))
.map(|ring| ring.read_entry())
}
}

/// Helper function to create a new `VcpuFd`.
Expand All @@ -1857,7 +1908,11 @@ impl VcpuFd {
/// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because
/// then it would be exported with the public `VcpuFd` interface.
pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd {
VcpuFd { vcpu, kvm_run_ptr }
VcpuFd {
vcpu,
kvm_run_ptr,
coalesced_mmio_ring: None,
}
}

impl AsRawFd for VcpuFd {
Expand Down Expand Up @@ -2440,6 +2495,7 @@ mod tests {
kvm_run_ptr: mmap_anonymous(10),
mmap_size: 10,
},
coalesced_mmio_ring: None,
};

assert_eq!(faulty_vcpu_fd.get_regs().unwrap_err().errno(), badf_errno);
Expand Down
62 changes: 62 additions & 0 deletions src/ioctls/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1562,6 +1562,68 @@ impl VmFd {
Err(errno::Error::last())
}
}

/// Registers an address for coalesced MMIO. Write accesses to the address
/// will not cause a corresponding [`VcpuExit`](crate::VcpuExit), but
/// instead will be appended to the MMIO ring buffer. The [`VcpuFd`] can
/// read entries in the ring buffer via [`VcpuFd::coalesced_mmio_read()`].
/// If entries are not read the buffer will eventually be full,
/// preventing further elements from being appended by the kernel.
///
/// Needs `KVM_CAP_COALESCED_MMIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedMmio))
/// and/or `KVM_CAP_COALESCED_PIO` ([`Cap::CoalescedMmio`](crate::Cap::CoalescedPio)).
///
/// See the documentation for `KVM_REGISTER_COALESCED_MMIO`.
///
/// # Arguments
///
/// * `addr` - Address being written to.
/// * `size` - The size of the write for the mechanism to trigger.
pub fn register_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> {
let (addr, pio) = match addr {
IoEventAddress::Pio(addr) => (addr, 1),
IoEventAddress::Mmio(addr) => (addr, 0),
};
let mut zone = kvm_coalesced_mmio_zone {
addr,
size,
..Default::default()
};
zone.__bindgen_anon_1.pio = pio;

// SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read
// the correct amount of memory from our pointer, and we verify the return result.
let ret = unsafe { ioctl_with_ref(self, KVM_REGISTER_COALESCED_MMIO(), &zone) };
if ret != 0 {
return Err(errno::Error::last());
}
Ok(())
}

/// Unregister an address that was previously registered via
/// [`register_coalesced_mmio()`](VmFd::register_coalesced_mmio).
///
/// See the documentation for `KVM_UNREGISTER_COALESCED_MMIO`.
pub fn unregister_coalesced_mmio(&self, addr: IoEventAddress, size: u32) -> Result<()> {
let (addr, pio) = match addr {
IoEventAddress::Pio(addr) => (addr, 1),
IoEventAddress::Mmio(addr) => (addr, 0),
};
let mut zone = kvm_coalesced_mmio_zone {
addr,
size,
..Default::default()
};
zone.__bindgen_anon_1.pio = pio;

// SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read
// the correct amount of memory from our pointer, and we verify the return result.
let ret = unsafe { ioctl_with_ref(self, KVM_UNREGISTER_COALESCED_MMIO(), &zone) };
if ret != 0 {
return Err(errno::Error::last());
}
Ok(())
}
}

/// Helper function to create a new `VmFd`.
Expand Down
14 changes: 14 additions & 0 deletions src/kvm_ioctls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,20 @@ ioctl_io_nr!(KVM_CREATE_IRQCHIP, KVMIO, 0x60);
target_arch = "aarch64"
))]
ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level);
/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */
ioctl_iow_nr!(
KVM_REGISTER_COALESCED_MMIO,
KVMIO,
0x67,
kvm_coalesced_mmio_zone
);
/* Available with KVM_CAP_COALESCED_MMIO / KVM_CAP_COALESCED_PIO */
ioctl_iow_nr!(
KVM_UNREGISTER_COALESCED_MMIO,
KVMIO,
0x68,
kvm_coalesced_mmio_zone
);
/* Available with KVM_CAP_IRQ_ROUTING */
#[cfg(any(
target_arch = "x86",
Expand Down

0 comments on commit b6604a0

Please sign in to comment.