diff --git a/measureme/Cargo.toml b/measureme/Cargo.toml
index 6cda378..161556c 100644
--- a/measureme/Cargo.toml
+++ b/measureme/Cargo.toml
@@ -13,6 +13,14 @@ repository = "https://github.com/rust-lang/measureme"
 travis-ci = { repository = "rust-lang/measureme" }
 
 [dependencies]
+log = "0.4"
 parking_lot = "0.11.0"
 rustc-hash = "1.0.1"
 smallvec = "1.0"
+
+[features]
+nightly = []
+
+[target.'cfg(all(target_arch = "x86_64", target_os = "linux"))'.dependencies]
+memmap = "0.7"
+perf-event-open-sys = "1.0.1"
diff --git a/measureme/src/counters.rs b/measureme/src/counters.rs
new file mode 100644
index 0000000..6b4ab0e
--- /dev/null
+++ b/measureme/src/counters.rs
@@ -0,0 +1,976 @@
+//! Profiling counters and their implementation.
+//!
+//! # Available counters
+//!
+//! Name (for [`Counter::by_name()`]) | Counter                      | OSes  | CPUs
+//! --------------------------------- | -------                      | ----  | ----
+//! `wall-time`                       | [`WallTime`]                 | any   | any
+//! `instructions:u`                  | [`Instructions`]             | Linux | `x86_64`
+//! `instructions-minus-irqs:u`       | [`InstructionsMinusIrqs`]    | Linux | `x86_64`<br>- AMD (since K8)<br>- Intel (since Sandy Bridge)
+//! `instructions-minus-r0420:u`      | [`InstructionsMinusRaw0420`] | Linux | `x86_64`<br>- AMD (Zen)
+//!
+//! *Note: `:u` suffixes for hardware performance counters come from the Linux `perf`
+//! tool, and indicate that the counter is only active while userspace code executes
+//! (i.e. it's paused while the kernel handles syscalls, interrupts, etc.).*
+//!
+//! # Limitations and caveats
+//!
+//! *Note: for more information, also see the GitHub PR which first implemented hardware
+//! performance counter support ([#143](https://github.com/rust-lang/measureme/pull/143)).*
+//!
+//! The hardware performance counters (i.e. all counters other than `wall-time`) are limited to:
+//! * nightly Rust (gated on `features = ["nightly"]`), for `asm!`
+//! * Linux, for out-of-the-box performance counter reads from userspace
+//!   * other OSes could work through custom kernel extensions/drivers, in the future
+//! * `x86_64` CPUs, mostly due to lack of other available test hardware
+//!   * new architectures would be easier to support (on Linux) than new OSes
+//!   * easiest to add would be 32-bit `x86` (aka `i686`), which would reuse
+//!     most of the `x86_64` CPU model detection logic
+//! * specific (newer) CPU models, for certain non-standard counters
+//!   * e.g. `instructions-minus-irqs:u` requires a "hardware interrupts" (aka "IRQs")
+//!     counter, which is implemented differently between vendors / models (if at all)
+//! * single-threaded programs (counters only work on the thread they were created on)
+//!   * for profiling `rustc`, this means only "check mode" (`--emit=metadata`),
+//!     is supported currently (`-Z no-llvm-threads` could also work)
+//!   * unclear what the best approach for handling multiple threads would be
+//!   * changing the API (e.g. to require per-thread profiler handles) could result
+//!     in a more efficient implementation, but would also be less ergonomic
+//!   * profiling data from multithreaded programs would be harder to use due to
+//!     noise from synchronization mechanisms, non-deterministic work-stealing, etc.
+//!
+//! For ergonomic reasons, the public API doesn't vary based on `features` or target.
+//! Instead, attempting to create any unsupported counter will return `Err`, just
+//! like it does for any issue detected at runtime (e.g. incompatible CPU model).
+//!
+//! When counting instructions specifically, these factors will impact the profiling quality:
+//! * high-level non-determinism (e.g. user interactions, networking)
+//!   * the ideal use-case is a mostly-deterministic program, e.g. a compiler like `rustc`
+//!   * if I/O can be isolated to separate profiling events, and doesn't impact
+//!     execution in a more subtle way (see below), the deterministic parts of
+//!     the program can still be profiled with high accuracy
+//! * low-level non-determinism (e.g. ASLR, randomized `HashMap`s, thread scheduling)
+//!   * ASLR ("Address Space Layout Randomization"), may be provided by the OS for
+//!     security reasons, or accidentally caused through allocations that depend on
+//!     random data (even as low-entropy as e.g. the base 10 length of a process ID)
+//!   * on Linux ASLR can be disabled by running the process under `setarch -R`
+//!   * this impacts `rustc` and LLVM, which rely on keying `HashMap`s by addresses
+//!     (typically of interned data) as an optimization, and while non-determinstic
+//!     outputs are considered bugs, the instructions executed can still vary a lot,
+//!     even when the externally observable behavior is perfectly repeatable
+//!   * `HashMap`s are involved in one more than one way:
+//!     * both the executed instructions, and the shape of the allocations depend
+//!       on both the hasher state and choice of keys (as the buckets are in
+//!       a flat array indexed by some of the lower bits of the key hashes)
+//!     * so every `HashMap` with keys being/containing addresses will amplify
+//!       ASLR and ASLR-like effects, making the entire program more sensitive
+//!     * the default hasher is randomized, and while `rustc` doesn't use it,
+//!       proc macros can (and will), and it's harder to disable than Linux ASLR
+//!   * `jemalloc` (the allocator used by `rustc`, at least in official releases)
+//!     has a 10 second "purge timer", which can introduce an ASLR-like effect,
+//!     unless disabled with `MALLOC_CONF=dirty_decay_ms:0,muzzy_decay_ms:0`
+//! * hardware flaws (whether in the design or implementation)
+//!   * hardware interrupts ("IRQs") and exceptions (like page faults) cause
+//!     overcounting (1 instruction per interrupt, possibly the `iret` from the
+//!     kernel handler back to the interrupted userspace program)
+//!     * this is the reason why `instructions-minus-irqs:u` should be preferred
+//!       to `instructions:u`, where the former is available
+//!     * there are system-wide options (e.g. `CONFIG_NO_HZ_FULL`) for removing
+//!       some interrupts from the cores used for profiling, but they're not as
+//!       complete of a solution, nor easy to set up in the first place
+//!   * AMD Zen CPUs have a speculative execution feature (dubbed `SpecLockMap`),
+//!     which can cause non-deterministic overcounting for instructions following
+//!     an atomic instruction (such as found in heap allocators, or `measureme`)
+//!     * this is automatically detected, with a `log` message pointing the user
+//!       to <https://github.com/mozilla/rr/wiki/Zen> for guidance on how to
+//!       disable `SpecLockMap` on their system (sadly requires root access)
+//!
+//! Even if some of the above caveats apply for some profiling setup, as long as
+//! the counters function, they can still be used, and compared with `wall-time`.
+//! Chances are, they will still have less variance, as everything that impacts
+//! instruction counts will also impact any time measurements.
+//!
+//! Also keep in mind that instruction counts do not properly reflect all kinds
+//! of workloads, e.g. SIMD throughput and cache locality are unaccounted for.
+
+use std::error::Error;
+use std::time::Instant;
+
+// HACK(eddyb) this is semantically `warn!` but uses `error!` because
+// that's the only log level enabled by default - see also
+// https://github.com/rust-lang/rust/issues/76824
+macro_rules! really_warn {
+    ($msg:literal $($rest:tt)*) => {
+        error!(concat!("[WARNING] ", $msg) $($rest)*)
+    }
+}
+
+pub enum Counter {
+    WallTime(WallTime),
+    Instructions(Instructions),
+    InstructionsMinusIrqs(InstructionsMinusIrqs),
+    InstructionsMinusRaw0420(InstructionsMinusRaw0420),
+}
+
+impl Counter {
+    pub fn by_name(name: &str) -> Result<Self, Box<dyn Error + Send + Sync>> {
+        Ok(match name {
+            WallTime::NAME => Counter::WallTime(WallTime::new()),
+            Instructions::NAME => Counter::Instructions(Instructions::new()?),
+            InstructionsMinusIrqs::NAME => {
+                Counter::InstructionsMinusIrqs(InstructionsMinusIrqs::new()?)
+            }
+            InstructionsMinusRaw0420::NAME => {
+                Counter::InstructionsMinusRaw0420(InstructionsMinusRaw0420::new()?)
+            }
+            _ => return Err(format!("{:?} is not a valid counter name", name).into()),
+        })
+    }
+
+    pub(super) fn describe_as_json(&self) -> String {
+        let (name, units) = match self {
+            Counter::WallTime(_) => (
+                WallTime::NAME,
+                r#"[["ns", 1], ["μs", 1000], ["ms", 1000000], ["s", 1000000000]]"#,
+            ),
+            Counter::Instructions(_) => (Instructions::NAME, r#"[["instructions", 1]]"#),
+            Counter::InstructionsMinusIrqs(_) => {
+                (InstructionsMinusIrqs::NAME, r#"[["instructions", 1]]"#)
+            }
+            Counter::InstructionsMinusRaw0420(_) => {
+                (InstructionsMinusRaw0420::NAME, r#"[["instructions", 1]]"#)
+            }
+        };
+        format!(r#"{{ "name": "{}", "units": {} }}"#, name, units)
+    }
+
+    #[inline]
+    pub(super) fn since_start(&self) -> u64 {
+        match self {
+            Counter::WallTime(counter) => counter.since_start(),
+            Counter::Instructions(counter) => counter.since_start(),
+            Counter::InstructionsMinusIrqs(counter) => counter.since_start(),
+            Counter::InstructionsMinusRaw0420(counter) => counter.since_start(),
+        }
+    }
+}
+
+/// "Monotonic clock" with nanosecond precision (using [`std::time::Instant`]).
+///
+/// Can be obtained with `Counter::by_name("wall-time")`.
+pub struct WallTime {
+    start: Instant,
+}
+
+impl WallTime {
+    const NAME: &'static str = "wall-time";
+
+    pub fn new() -> Self {
+        WallTime {
+            start: Instant::now(),
+        }
+    }
+
+    #[inline]
+    fn since_start(&self) -> u64 {
+        self.start.elapsed().as_nanos() as u64
+    }
+}
+
+/// "Instructions retired" hardware performance counter (userspace-only).
+///
+/// Can be obtained with `Counter::by_name("instructions:u")`.
+pub struct Instructions {
+    instructions: hw::Counter,
+    start: u64,
+}
+
+impl Instructions {
+    const NAME: &'static str = "instructions:u";
+
+    pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
+        let model = hw::CpuModel::detect()?;
+        let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?;
+        let start = instructions.read();
+        Ok(Instructions {
+            instructions,
+            start,
+        })
+    }
+
+    #[inline]
+    fn since_start(&self) -> u64 {
+        self.instructions.read().wrapping_sub(self.start)
+    }
+}
+
+/// More accurate [`Instructions`] (subtracting hardware interrupt counts).
+///
+/// Can be obtained with `Counter::by_name("instructions-minus-irqs:u")`.
+pub struct InstructionsMinusIrqs {
+    instructions: hw::Counter,
+    irqs: hw::Counter,
+    start: u64,
+}
+
+impl InstructionsMinusIrqs {
+    const NAME: &'static str = "instructions-minus-irqs:u";
+
+    pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
+        let model = hw::CpuModel::detect()?;
+        let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?;
+        let irqs = hw::Counter::new(&model, HwCounterType::Irqs)?;
+        let (start_instructions, start_irqs) = (&instructions, &irqs).read();
+        let start = start_instructions.wrapping_sub(start_irqs);
+        Ok(InstructionsMinusIrqs {
+            instructions,
+            irqs,
+            start,
+        })
+    }
+
+    #[inline]
+    fn since_start(&self) -> u64 {
+        let (instructions, irqs) = (&self.instructions, &self.irqs).read();
+        instructions.wrapping_sub(irqs).wrapping_sub(self.start)
+    }
+}
+
+/// (Experimental) Like [`InstructionsMinusIrqs`] (but using an undocumented `r0420:u` counter).
+///
+/// Can be obtained with `Counter::by_name("instructions-minus-r0420:u")`.
+//
+// HACK(eddyb) this is a variant of `instructions-minus-irqs:u`, where `r0420`
+// is subtracted, instead of the usual "hardware interrupts" (aka IRQs).
+// `r0420` is an undocumented counter on AMD Zen CPUs which appears to count
+// both hardware interrupts and exceptions (such as page faults), though
+// it's unclear yet what exactly it's counting (could even be `iret`s).
+pub struct InstructionsMinusRaw0420(InstructionsMinusIrqs);
+
+impl InstructionsMinusRaw0420 {
+    const NAME: &'static str = "instructions-minus-r0420:u";
+
+    pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
+        let model = hw::CpuModel::detect()?;
+        let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?;
+        let irqs = hw::Counter::new(&model, HwCounterType::Raw0420)?;
+        let (start_instructions, start_irqs) = (&instructions, &irqs).read();
+        let start = start_instructions.wrapping_sub(start_irqs);
+        Ok(InstructionsMinusRaw0420(InstructionsMinusIrqs {
+            instructions,
+            irqs,
+            start,
+        }))
+    }
+
+    #[inline]
+    fn since_start(&self) -> u64 {
+        self.0.since_start()
+    }
+}
+
+trait HwCounterRead {
+    type Output;
+    fn read(&self) -> Self::Output;
+}
+
+enum HwCounterType {
+    Instructions,
+    Irqs,
+    Raw0420,
+}
+
+const BUG_REPORT_MSG: &str =
+    "please report this to https://github.com/rust-lang/measureme/issues/new";
+
+/// Linux x86_64 implementation based on `perf_event_open` and `rdpmc`.
+#[cfg(all(feature = "nightly", target_arch = "x86_64", target_os = "linux"))]
+mod hw {
+    use memmap::{Mmap, MmapOptions};
+    use perf_event_open_sys::{bindings::*, perf_event_open};
+    use std::convert::TryInto;
+    use std::error::Error;
+    use std::fs;
+    use std::mem;
+    use std::os::unix::io::FromRawFd;
+
+    pub(super) struct Counter {
+        mmap: Mmap,
+        reg_idx: u32,
+    }
+
+    impl Counter {
+        pub(super) fn new(
+            model: &CpuModel,
+            counter_type: super::HwCounterType,
+        ) -> Result<Self, Box<dyn Error + Send + Sync>> {
+            let (type_, hw_id) = match counter_type {
+                super::HwCounterType::Instructions => (
+                    perf_type_id_PERF_TYPE_HARDWARE,
+                    perf_hw_id_PERF_COUNT_HW_INSTRUCTIONS,
+                ),
+                super::HwCounterType::Irqs => {
+                    (perf_type_id_PERF_TYPE_RAW, model.irqs_counter_config()?)
+                }
+                super::HwCounterType::Raw0420 => {
+                    match model {
+                        CpuModel::Amd(AmdGen::Zen) => {}
+
+                        _ => really_warn!(
+                            "Counter::new: the undocumented `r0420` performance \
+                             counter has only been observed on AMD Zen CPUs"
+                        ),
+                    }
+
+                    (perf_type_id_PERF_TYPE_RAW, 0x04_20)
+                }
+            };
+            Self::with_type_and_hw_id(type_, hw_id)
+        }
+
+        fn with_type_and_hw_id(
+            type_: perf_type_id,
+            hw_id: u32,
+        ) -> Result<Self, Box<dyn Error + Send + Sync>> {
+            let mut attrs = perf_event_attr {
+                size: mem::size_of::<perf_event_attr>().try_into().unwrap(),
+                type_,
+                config: hw_id.into(),
+                ..perf_event_attr::default()
+            };
+
+            // Only record same-thread, any CPUs, and only userspace (no kernel/hypervisor).
+            // NOTE(eddyb) `pid = 0`, despite talking about "process id", means
+            // "calling process/thread", *not* "any thread in the calling process"
+            // (i.e. "process" is interchangeable with "main thread of the process")
+            // FIXME(eddyb) introduce per-thread counters and/or use `inherit`
+            // (and `inherit_stat`? though they might not be appropriate here)
+            // to be able to read the counter on more than just the initial thread.
+            let pid = 0;
+            let cpu = -1;
+            let group_fd = -1;
+            attrs.set_exclude_kernel(1);
+            attrs.set_exclude_hv(1);
+
+            let file = unsafe {
+                let fd =
+                    perf_event_open(&mut attrs, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC.into());
+                if fd < 0 {
+                    Err(std::io::Error::from_raw_os_error(-fd))
+                } else {
+                    Ok(fs::File::from_raw_fd(fd))
+                }
+            };
+            let file = file.map_err(|e| format!("perf_event_open failed: {:?}", e))?;
+
+            let mmap = unsafe {
+                MmapOptions::new()
+                    .len(mem::size_of::<perf_event_mmap_page>())
+                    .map(&file)
+            };
+            let mmap = mmap.map_err(|e| format!("perf_event_mmap_page: mmap failed: {:?}", e))?;
+
+            let mut counter = Counter { mmap, reg_idx: 0 };
+
+            let (version, compat_version, caps, index, pmc_width) = counter
+                .access_mmap_page_with_seqlock(|mp| {
+                    (
+                        mp.version,
+                        mp.compat_version,
+                        unsafe { mp.__bindgen_anon_1.__bindgen_anon_1 },
+                        mp.index,
+                        mp.pmc_width,
+                    )
+                });
+
+            info!(
+                "Counter::new: version={} compat_version={} index={:#x}",
+                version, compat_version, index,
+            );
+
+            if caps.cap_user_rdpmc() == 0 {
+                return Err(format!(
+                    "perf_event_mmap_page: missing cap_user_rdpmc{}",
+                    if caps.cap_bit0_is_deprecated() == 0 && caps.cap_bit0() == 1 {
+                        " (ignoring legacy/broken rdpmc support)"
+                    } else {
+                        ""
+                    }
+                )
+                .into());
+            }
+
+            if index == 0 {
+                return Err(format!(
+                    "perf_event_mmap_page: no allocated hardware register (ran out?)"
+                )
+                .into());
+            }
+            counter.reg_idx = index - 1;
+
+            if (cfg!(not(accurate_seqlock_rdpmc)) || true) && pmc_width != 48 {
+                return Err(format!(
+                    "perf_event_mmap_page: {}-bit hardware counter found, only 48-bit supported",
+                    pmc_width
+                )
+                .into());
+            }
+
+            Ok(counter)
+        }
+
+        /// Try to access the mmap page, retrying the `attempt` closure as long
+        /// as the "seqlock" sequence number changes (which indicates the kernel
+        /// has updated one or more fields within the mmap page).
+        #[inline]
+        fn access_mmap_page_with_seqlock<T>(
+            &self,
+            attempt: impl Fn(&perf_event_mmap_page) -> T,
+        ) -> T {
+            // FIXME(eddyb) it's probably UB to use regular reads, especially
+            // from behind `&T`, with the only synchronization being barriers.
+            // Probably needs atomic reads, and stronger ones at that, for the
+            // `lock` field, than the fields (which would be `Relaxed`?).
+            let mmap_page = unsafe { &*(self.mmap.as_ptr() as *const perf_event_mmap_page) };
+            let barrier = || std::sync::atomic::fence(std::sync::atomic::Ordering::Acquire);
+
+            loop {
+                // Grab the "seqlock" - the kernel will update this value when it
+                // updates any of the other fields that may be read in `attempt`.
+                let seq_lock = mmap_page.lock;
+                barrier();
+
+                let result = attempt(mmap_page);
+
+                // If nothing has changed, we're done. Otherwise, keep retrying.
+                barrier();
+                if mmap_page.lock == seq_lock {
+                    return result;
+                }
+            }
+        }
+    }
+
+    impl super::HwCounterRead for Counter {
+        type Output = u64;
+
+        #[inline]
+        fn read(&self) -> u64 {
+            // HACK(eddyb) keep the accurate code around while not using it,
+            // to minimize overhead without losing the more complex implementation.
+            let (counter, offset, pmc_width) = if cfg!(accurate_seqlock_rdpmc) && false {
+                self.access_mmap_page_with_seqlock(|mp| {
+                    let caps = unsafe { mp.__bindgen_anon_1.__bindgen_anon_1 };
+                    assert_ne!(caps.cap_user_rdpmc(), 0);
+
+                    (
+                        rdpmc(mp.index.checked_sub(1).unwrap()),
+                        mp.offset,
+                        mp.pmc_width,
+                    )
+                })
+            } else {
+                (rdpmc(self.reg_idx), 0, 48)
+            };
+
+            let counter = offset + (counter as i64);
+
+            // Sign-extend the `pmc_width`-bit value to `i64`.
+            (counter << (64 - pmc_width) >> (64 - pmc_width)) as u64
+        }
+    }
+
+    impl super::HwCounterRead for (&Counter, &Counter) {
+        type Output = (u64, u64);
+
+        #[inline]
+        fn read(&self) -> (u64, u64) {
+            // HACK(eddyb) keep the accurate code around while not using it,
+            // to minimize overhead without losing the more complex implementation.
+            if (cfg!(accurate_seqlock_rdpmc) || cfg!(unserialized_rdpmc)) && false {
+                return (self.0.read(), self.1.read());
+            }
+
+            let pmc_width = 48;
+
+            let (a_counter, b_counter) = rdpmc_pair(self.0.reg_idx, self.1.reg_idx);
+
+            // Sign-extend the `pmc_width`-bit values to `i64`.
+            (
+                ((a_counter as i64) << (64 - pmc_width) >> (64 - pmc_width)) as u64,
+                ((b_counter as i64) << (64 - pmc_width) >> (64 - pmc_width)) as u64,
+            )
+        }
+    }
+
+    /// Read the hardware performance counter indicated by `reg_idx`.
+    ///
+    /// If the counter is signed, sign extension should be performed based on
+    /// the width of the register (32 to 64 bits, e.g. 48-bit seems common).
+    #[inline(always)]
+    fn rdpmc(reg_idx: u32) -> u64 {
+        let (lo, hi): (u32, u32);
+        unsafe {
+            // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`).
+            if cfg!(unserialized_rdpmc) && false {
+                // FIXME(eddyb) the Intel and AMD manuals warn about the need for
+                // "serializing instructions" before/after `rdpmc`, if avoiding any
+                // reordering is desired, but do not agree on the full set of usable
+                // "serializing instructions" (e.g. `mfence` isn't listed in both).
+                //
+                // The only usable, and guaranteed to work, "serializing instruction"
+                // appears to be `cpuid`, but it doesn't seem easy to use, especially
+                // due to the overlap in registers with `rdpmc` itself, and it might
+                // have too high of a cost, compared to serialization benefits (if any).
+                asm!("rdpmc", in("ecx") reg_idx, out("eax") lo, out("edx") hi, options(nostack));
+            } else {
+                asm!(
+                    // Dummy `cpuid(0)` to serialize instruction execution.
+                    "xor eax, eax",
+                    "cpuid",
+
+                    "mov ecx, {rdpmc_ecx:e}",
+                    "rdpmc",
+                    rdpmc_ecx = in(reg) reg_idx,
+                    out("eax") lo,
+                    out("edx") hi,
+
+                    // `cpuid` clobbers (not overwritten by `rdpmc`).
+                    out("ebx") _,
+                    out("ecx") _,
+
+                    options(nostack),
+                );
+            }
+        }
+        lo as u64 | (hi as u64) << 32
+    }
+
+    /// Read two hardware performance counters at once (see `rdpmc`).
+    ///
+    /// Should be more efficient/accurate than two `rdpmc` calls, as it
+    /// only requires one "serializing instruction", rather than two.
+    #[inline(always)]
+    fn rdpmc_pair(a_reg_idx: u32, b_reg_idx: u32) -> (u64, u64) {
+        let (a_lo, a_hi): (u32, u32);
+        let (b_lo, b_hi): (u32, u32);
+        unsafe {
+            asm!(
+                // Dummy `cpuid(0)` to serialize instruction execution.
+                "xor eax, eax",
+                "cpuid",
+
+                "mov ecx, {a_rdpmc_ecx:e}",
+                "rdpmc",
+                "mov {a_rdpmc_eax:e}, eax",
+                "mov {a_rdpmc_edx:e}, edx",
+                "mov ecx, {b_rdpmc_ecx:e}",
+                "rdpmc",
+                a_rdpmc_ecx = in(reg) a_reg_idx,
+                a_rdpmc_eax = out(reg) a_lo,
+                a_rdpmc_edx = out(reg) a_hi,
+                b_rdpmc_ecx = in(reg) b_reg_idx,
+                out("eax") b_lo,
+                out("edx") b_hi,
+
+                // `cpuid` clobbers (not overwritten by `rdpmc`).
+                out("ebx") _,
+                out("ecx") _,
+
+                options(nostack),
+            );
+        }
+        (
+            a_lo as u64 | (a_hi as u64) << 32,
+            b_lo as u64 | (b_hi as u64) << 32,
+        )
+    }
+
+    /// Categorization of `x86_64` CPUs, primarily based on how they
+    /// support for counting "hardware interrupts" (documented or not).
+    pub(super) enum CpuModel {
+        Amd(AmdGen),
+        Intel(IntelGen),
+    }
+
+    pub(super) enum AmdGen {
+        /// K8 (Hammer) to Jaguar / Puma.
+        PreZen,
+
+        /// Zen / Zen+ / Zen 2.
+        Zen,
+
+        /// Unknown AMD CPU, contemporary to/succeeding Zen/Zen+/Zen 2,
+        /// but likely similar to them.
+        UnknownMaybeZenLike,
+    }
+
+    pub(super) enum IntelGen {
+        /// Intel CPU predating Sandy Bridge. These are the only CPUs we
+        /// can't support (more) accurate instruction counting on, as they
+        /// don't (appear to) have any way to count "hardware interrupts".
+        PreBridge,
+
+        /// Sandy Bridge / Ivy Bridge:
+        /// * client: Sandy Bridge (M/H) / Ivy Bridge (M/H/Gladden)
+        /// * server: Sandy Bridge (E/EN/EP) / Ivy Bridge (E/EN/EP/EX)
+        ///
+        /// Intel doesn't document support for counting "hardware interrupts"
+        /// prior to Skylake, but testing found that `HW_INTERRUPTS.RECEIVED`
+        /// from Skylake has existed, with the same config, as far back as
+        /// "Sandy Bridge" (but before that it mapped to a different event).
+        ///
+        /// These are the (pre-Skylake) *Bridge CPU models confirmed so far:
+        /// * Sandy Bridge (client) Family 6 Model 42
+        ///     Intel(R) Core(TM) i5-2520M CPU @ 2.50GHz (@alyssais)
+        /// * Ivy Bridge (client) Family 6 Model 58
+        ///     Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz (@eddyb)
+        ///
+        /// We later found this paper, which on page 5 lists 12 counters,
+        /// for each of Nehalem/Westmere, Sandy Bridge and Ivy Bridge:
+        /// http://web.eece.maine.edu/~vweaver/projects/deterministic/deterministic_counters.pdf
+        /// It appears that both Sandy Bridge and Ivy Bridge used to have
+        /// `HW_INTERRUPTS.RECEIVED` documented, before Intel removed every
+        /// mention of the counter from newer versions of their manuals.
+        Bridge,
+
+        /// Haswell / Broadwell:
+        /// * client: Haswell (S/ULT/GT3e) / Broadwell (U/Y/S/H/C/W)
+        /// * server: Haswell (E/EP/EX) / Broadwell (E/EP/EX/DE/Hewitt Lake)
+        ///
+        /// Equally as undocumented as "Sandy Bridge / Ivy Bridge" (see above).
+        ///
+        /// These are the (pre-Skylake) *Well CPU models confirmed so far:
+        /// * Haswell (client) Family 6 Model 60
+        ///     Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz (@m-ou-se)
+        /// * Haswell (server) Family 6 Model 63
+        ///     Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz (@cuviper)
+        /// * Haswell (client + GT3e) Family 6 Model 70
+        ///     Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz (@nagisa)
+        ///     Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz (@m-ou-se)
+        Well,
+
+        /// Skylake / Skylake-derived:
+        /// * client: Skylake (Y/U/DT/H/S) / Kaby Lake (Y/U/DT/H/S/X) / Coffee Lake (U/S/H/E)
+        /// * server: Skylake (SP/X/DE/W) / Cascade Lake (SP/X/W)
+        ///
+        /// Both "client" and "server" product lines have documented support
+        /// for counting "hardware interrupts" (`HW_INTERRUPTS.RECEIVED`).
+        ///
+        /// Intel does not make it clear that future product lines, such as
+        /// "Ice Lake", will continue to support this (or with what config),
+        /// and even "Comet Lake" (aka "10th gen") isn't explicitly listed.
+        Lake,
+
+        /// Unknown Intel CPU, contemporary to/succeeding *Bridge/*Well/*Lake,
+        /// but likely similar to them.
+        UnknownMaybeLakeLike,
+    }
+
+    impl CpuModel {
+        /// Detect the model of the current CPU using `cpuid`.
+        pub(super) fn detect() -> Result<Self, Box<dyn Error + Send + Sync>> {
+            let cpuid0 = unsafe { std::arch::x86_64::__cpuid(0) };
+            let cpuid1 = unsafe { std::arch::x86_64::__cpuid(1) };
+            let mut vendor = [0; 12];
+            vendor[0..4].copy_from_slice(&cpuid0.ebx.to_le_bytes());
+            vendor[4..8].copy_from_slice(&cpuid0.edx.to_le_bytes());
+            vendor[8..12].copy_from_slice(&cpuid0.ecx.to_le_bytes());
+
+            let vendor = std::str::from_utf8(&vendor).map_err(|_| {
+                format!(
+                    "cpuid returned non-UTF-8 vendor name: cpuid(0)={:?} cpuid(1)={:?}",
+                    cpuid0, cpuid1
+                )
+            })?;
+
+            let version = cpuid1.eax;
+
+            let mut family = (version >> 8) & 0xf;
+            if family == 15 {
+                // Extended family.
+                family += (version >> 20) & 0xff;
+            }
+
+            let mut model = (version >> 4) & 0xf;
+            if family >= 15 || vendor == "GenuineIntel" && family == 6 {
+                // Extended model.
+                model += ((version >> 16) & 0xf) << 4;
+            }
+
+            info!(
+                "CpuModel::detect: vendor={:?} family={} model={}",
+                vendor, family, model
+            );
+
+            match vendor {
+                "AuthenticAMD" => {
+                    use self::AmdGen::*;
+
+                    let (gen, name) = match (family, model) {
+                        (0..=14, _) | (19, _) => {
+                            return Err(format!(
+                                "impossible AMD64 CPU detected (Family {} Model {}); {}",
+                                family,
+                                model,
+                                super::BUG_REPORT_MSG
+                            )
+                            .into());
+                        }
+
+                        (15, _) => (PreZen, "K8 (Hammer)"),
+                        (16, _) => (PreZen, "K10 (Barcelona/Shanghai/Istanbul)"),
+                        (17, _) => (PreZen, "K8+K10 hybrid (Turion X2 Ultra)"),
+                        (18, _) => (PreZen, "Fusion"),
+                        (20, _) => (PreZen, "Bobcat"),
+                        (21, _) => (PreZen, "Bulldozer / Piledriver / Steamroller / Excavator"),
+                        (22, _) => (PreZen, "Jaguar / Puma"),
+
+                        (23, 1) => (Zen, "Zen (Naples/Whitehaven/Summit Ridge/Snowy Owl)"),
+                        (23, 17) => (Zen, "Zen (Raven Ridge)"),
+                        (23, 24) => (Zen, "Zen (Banded Kestrel/Dali) / Zen+ (Picasso)"),
+                        (23, 8) => (Zen, "Zen+ (Pinnacle Ridge)"),
+                        (23, 49) => (Zen, "Zen 2 (Rome/Castle Peak)"),
+                        (23, 113) => (Zen, "Zen 2 (Matisse)"),
+
+                        (23..=0xffff_ffff, _) => {
+                            really_warn!(
+                                "CpuModel::detect: unknown AMD CPU (Family {} Model {}), \
+                                 assuming Zen-like; {}",
+                                family,
+                                model,
+                                super::BUG_REPORT_MSG
+                            );
+
+                            (UnknownMaybeZenLike, "")
+                        }
+                    };
+
+                    if !name.is_empty() {
+                        info!("CpuModel::detect: known AMD CPU: {}", name);
+                    }
+
+                    // The `SpecLockMap` (speculative atomic aka `lock` instruction
+                    // execution, unclear what "Map" refers to) feature in AMD Zen CPUs
+                    // causes non-deterministic overcounting of atomic instructions,
+                    // presumably whenever it has to roll back the speculation
+                    // (as in, the performance counters aren't rolled back).
+                    // Even this this may be rare when uncontended, it adds up.
+                    //
+                    // There is an MSR bit (`MSRC001_1020[54]`) that's not officially
+                    // documented, but which several motherboards and profiling tools
+                    // set whenever IBS (Instruction-Based Sampling) is in use, and
+                    // it is sometimes referred to as "disabling `SpecLockMap`"
+                    // (hence having a name for the feature that speculates `lock`s).
+                    //
+                    // One way we could detect that the bit has been set would be to
+                    // parse `uname().release` (aka `uname -r`) and look for versions
+                    // which are known to include the patch suggested in this thread:
+                    // https://github.com/mozilla/rr/issues/2034#issuecomment-693761247
+                    //
+                    // However, one may set the bit using e.g. `wrmsr`, even on older
+                    // kernels, so a more reliable approach is to execute some atomics
+                    // and look at the `SpecLockMapCommit` (`r0825:u`) Zen counter,
+                    // which only reliably remains `0` when `SpecLockMap` is disabled.
+                    if matches!(gen, Zen | UnknownMaybeZenLike) {
+                        if let Ok(spec_lock_map_commit) =
+                            Counter::with_type_and_hw_id(perf_type_id_PERF_TYPE_RAW, 0x08_25)
+                        {
+                            use super::HwCounterRead;
+
+                            let start_spec_lock_map_commit = spec_lock_map_commit.read();
+
+                            // Execute an atomic (`lock`) instruction, which should
+                            // start speculative execution for following instructions
+                            // (as long as `SpecLockMap` isn't disabled).
+                            let mut atomic: u64 = 0;
+                            let mut _tmp: u64 = 0;
+                            unsafe {
+                                asm!(
+                                    "lock xadd qword ptr [{atomic}], {tmp}",
+
+                                    atomic = in(reg) &mut atomic,
+                                    tmp = inout(reg) _tmp,
+                                );
+                            }
+
+                            if spec_lock_map_commit.read() != start_spec_lock_map_commit {
+                                really_warn!(
+                                    "CpuModel::detect: SpecLockMap detected, in AMD {} CPU; \
+                                     this may add some non-deterministic noise - \
+                                     for information on disabling SpecLockMap, see \
+                                     https://github.com/mozilla/rr/wiki/Zen",
+                                    name
+                                );
+                            }
+                        }
+                    }
+
+                    Ok(CpuModel::Amd(gen))
+                }
+
+                "GenuineIntel" => {
+                    use self::IntelGen::*;
+
+                    let (gen, name) = match (family, model) {
+                        // No need to name these, they're unsupported anyway.
+                        (0..=5, _) => (PreBridge, ""),
+                        (15, _) => (PreBridge, "Netburst"),
+                        (6, 0..=41) => (PreBridge, ""),
+
+                        // Older Xeon Phi CPUs, misplaced in Family 6.
+                        (6, 87) => (PreBridge, "Knights Landing"),
+                        (6, 133) => (PreBridge, "Knights Mill"),
+
+                        // Older Atom CPUs, interleaved with other CPUs.
+                        // FIXME(eddyb) figure out if these are like *Bridge/*Well.
+                        (6, 53) | (6, 54) => (PreBridge, "Saltwell"),
+                        (6, 55) | (6, 74) | (6, 77) | (6, 90) | (6, 93) => {
+                            (PreBridge, "Silvermont")
+                        }
+                        (6, 76) => (PreBridge, "Airmont (Cherry Trail/Braswell)"),
+
+                        // Older server CPUs, numbered out of order.
+                        (6, 44) => (PreBridge, "Westmere (Gulftown/EP)"),
+                        (6, 46) => (PreBridge, "Nehalem (EX)"),
+                        (6, 47) => (PreBridge, "Westmere (EX)"),
+
+                        (6, 42) => (Bridge, "Sandy Bridge (M/H)"),
+                        (6, 45) => (Bridge, "Sandy Bridge (E/EN/EP)"),
+                        (6, 58) => (Bridge, "Ivy Bridge (M/H/Gladden)"),
+                        (6, 62) => (Bridge, "Ivy Bridge (E/EN/EP/EX)"),
+
+                        (6, 60) => (Well, "Haswell (S)"),
+                        (6, 61) => (Well, "Broadwell (U/Y/S)"),
+                        (6, 63) => (Well, "Haswell (E/EP/EX)"),
+                        (6, 69) => (Well, "Haswell (ULT)"),
+                        (6, 70) => (Well, "Haswell (GT3e)"),
+                        (6, 71) => (Well, "Broadwell (H/C/W)"),
+                        (6, 79) => (Well, "Broadwell (E/EP/EX)"),
+                        (6, 86) => (Well, "Broadwell (DE/Hewitt Lake)"),
+
+                        (6, 78) => (Lake, "Skylake (Y/U)"),
+                        (6, 85) => (Lake, "Skylake (SP/X/DE/W) / Cascade Lake (SP/X/W)"),
+                        (6, 94) => (Lake, "Skylake (DT/H/S)"),
+                        (6, 142) => (Lake, "Kaby Lake (Y/U) / Coffee Lake (U)"),
+                        (6, 158) => (Lake, "Kaby Lake (DT/H/S/X) / Coffee Lake (S/H/E)"),
+
+                        (6..=14, _) | (16..=0xffff_ffff, _) => {
+                            really_warn!(
+                                "CpuModel::detect: unknown Intel CPU (Family {} Model {}), \
+                                 assuming Skylake-like; {}",
+                                family,
+                                model,
+                                super::BUG_REPORT_MSG
+                            );
+
+                            (UnknownMaybeLakeLike, "")
+                        }
+                    };
+
+                    if !name.is_empty() {
+                        info!("CpuModel::detect: known Intel CPU: {}", name);
+                    }
+
+                    Ok(CpuModel::Intel(gen))
+                }
+
+                _ => Err(format!(
+                    "cpuid returned unknown CPU vendor {:?}; version={:#x}",
+                    vendor, version
+                )
+                .into()),
+            }
+        }
+
+        /// Return the hardware performance counter configuration for
+        /// counting "hardware interrupts" (documented or not).
+        fn irqs_counter_config(&self) -> Result<u32, Box<dyn Error + Send + Sync>> {
+            match self {
+                CpuModel::Amd(model) => match model {
+                    AmdGen::PreZen => Ok(0x00_cf),
+                    AmdGen::Zen | AmdGen::UnknownMaybeZenLike => Ok(0x00_2c),
+                },
+                CpuModel::Intel(model) => match model {
+                    IntelGen::PreBridge => Err(format!(
+                        "counting IRQs not yet supported on Intel CPUs \
+                         predating Sandy Bridge; {}",
+                        super::BUG_REPORT_MSG
+                    )
+                    .into()),
+                    IntelGen::Bridge
+                    | IntelGen::Well
+                    | IntelGen::Lake
+                    | IntelGen::UnknownMaybeLakeLike => Ok(0x01_cb),
+                },
+            }
+        }
+    }
+}
+
+#[cfg(not(all(feature = "nightly", target_arch = "x86_64", target_os = "linux")))]
+mod hw {
+    use std::error::Error;
+
+    pub(super) enum Counter {}
+
+    impl Counter {
+        pub(super) fn new(
+            model: &CpuModel,
+            _: super::HwCounterType,
+        ) -> Result<Self, Box<dyn Error + Send + Sync>> {
+            match *model {}
+        }
+    }
+
+    impl super::HwCounterRead for Counter {
+        type Output = u64;
+
+        #[inline]
+        fn read(&self) -> u64 {
+            match *self {}
+        }
+    }
+
+    impl super::HwCounterRead for (&Counter, &Counter) {
+        type Output = (u64, u64);
+
+        #[inline]
+        fn read(&self) -> (u64, u64) {
+            match *self.0 {}
+        }
+    }
+
+    pub(super) enum CpuModel {}
+
+    impl CpuModel {
+        pub(super) fn detect() -> Result<Self, Box<dyn Error + Send + Sync>> {
+            // HACK(eddyb) mark `really_warn!` (and transitively `log` macros)
+            // and `BUG_REPORT_MSG` as "used" to silence warnings.
+            if false {
+                really_warn!("unsupported; {}", super::BUG_REPORT_MSG);
+            }
+
+            let mut msg = String::new();
+            let mut add_error = |s| {
+                if !msg.is_empty() {
+                    msg += "; ";
+                }
+                msg += s;
+            };
+
+            if cfg!(not(feature = "nightly")) {
+                add_error("only supported with measureme's \"nightly\" feature");
+            }
+
+            if cfg!(not(target_arch = "x86_64")) {
+                add_error("only supported architecture is x86_64");
+            }
+
+            if cfg!(not(target_os = "linux")) {
+                add_error("only supported OS is Linux");
+            }
+
+            Err(msg.into())
+        }
+    }
+}
diff --git a/measureme/src/lib.rs b/measureme/src/lib.rs
index 30c8674..446c90a 100644
--- a/measureme/src/lib.rs
+++ b/measureme/src/lib.rs
@@ -12,6 +12,10 @@
 //!
 //! To create a [`Profiler`], call the [`Profiler::new()`] function and provide a `Path` with
 //! the directory and file name for the trace files.
+//! Alternatively, call the [`Profiler::with_counter()`] function, to choose the [`Counter`]
+//! the profiler will use for events (whereas [`Profiler::new()`] defaults to `wall-time`).
+//!
+//! For more information on available counters, see the [`counters`] module documentation.
 //!
 //! To record an event, call the [`Profiler::record_instant_event()`] method, passing a few
 //! arguments:
@@ -28,16 +32,24 @@
 //!   - [`Profiler::alloc_string()`]: allocates a string and returns the [`StringId`] that refers
 //!     to it
 //!
+//! [`counters`]: counters
+//! [`Counter`]: counters::Counter
 //! [`Profiler`]: Profiler
 //! [`Profiler::alloc_string()`]: Profiler::alloc_string
 //! [`Profiler::alloc_string_with_reserved_id()`]: Profiler::alloc_string_with_reserved_id
 //! [`Profiler::new()`]: Profiler::new
 //! [`Profiler::record_event()`]: Profiler::record_event
 //! [`Profiler::start_recording_interval_event()`]: Profiler::start_recording_interval_event
+//! [`Profiler::with_counter()`]: Profiler::with_counter
 //! [`StringId`]: StringId
 #![allow(renamed_and_removed_lints)] // intra_doc_link_resolution_failure is renamed on nightly
 #![deny(warnings, intra_doc_link_resolution_failure)]
+#![cfg_attr(feature = "nightly", feature(asm))]
+
+#[macro_use]
+extern crate log;
 
+pub mod counters;
 pub mod event_id;
 pub mod file_header;
 mod profiler;
diff --git a/measureme/src/profiler.rs b/measureme/src/profiler.rs
index c897c2b..f4dd3bb 100644
--- a/measureme/src/profiler.rs
+++ b/measureme/src/profiler.rs
@@ -1,3 +1,4 @@
+use crate::counters::Counter;
 use crate::file_header::{write_file_header, FILE_MAGIC_EVENT_STREAM, FILE_MAGIC_TOP_LEVEL};
 use crate::raw_event::RawEvent;
 use crate::serialization::{PageTag, SerializationSink, SerializationSinkBuilder};
@@ -7,16 +8,25 @@ use std::error::Error;
 use std::fs;
 use std::path::Path;
 use std::sync::Arc;
-use std::time::Instant;
 
 pub struct Profiler {
     event_sink: Arc<SerializationSink>,
     string_table: StringTableBuilder,
-    start_time: Instant,
+    counter: Counter,
 }
 
 impl Profiler {
     pub fn new<P: AsRef<Path>>(path_stem: P) -> Result<Profiler, Box<dyn Error + Send + Sync>> {
+        Self::with_counter(
+            path_stem,
+            Counter::WallTime(crate::counters::WallTime::new()),
+        )
+    }
+
+    pub fn with_counter<P: AsRef<Path>>(
+        path_stem: P,
+        counter: Counter,
+    ) -> Result<Profiler, Box<dyn Error + Send + Sync>> {
         let path = path_stem.as_ref().with_extension(FILE_EXTENSION);
 
         fs::create_dir_all(path.parent().unwrap())?;
@@ -39,7 +49,7 @@ impl Profiler {
         let profiler = Profiler {
             event_sink,
             string_table,
-            start_time: Instant::now(),
+            counter,
         };
 
         let mut args = String::new();
@@ -49,13 +59,14 @@ impl Profiler {
         }
 
         profiler.string_table.alloc_metadata(&*format!(
-            r#"{{ "start_time": {}, "process_id": {}, "cmd": "{}" }}"#,
+            r#"{{ "start_time": {}, "process_id": {}, "cmd": "{}", "counter": {} }}"#,
             std::time::SystemTime::now()
                 .duration_since(std::time::UNIX_EPOCH)
                 .unwrap()
                 .as_nanos(),
             std::process::id(),
             args,
+            profiler.counter.describe_as_json(),
         ));
 
         Ok(profiler)
@@ -88,7 +99,7 @@ impl Profiler {
     /// automatically.
     pub fn record_instant_event(&self, event_kind: StringId, event_id: EventId, thread_id: u32) {
         let raw_event =
-            RawEvent::new_instant(event_kind, event_id, thread_id, self.nanos_since_start());
+            RawEvent::new_instant(event_kind, event_id, thread_id, self.counter.since_start());
 
         self.record_raw_event(&raw_event);
     }
@@ -107,7 +118,7 @@ impl Profiler {
             event_id,
             event_kind,
             thread_id,
-            start_ns: self.nanos_since_start(),
+            start_count: self.counter.since_start(),
         }
     }
 
@@ -117,10 +128,6 @@ impl Profiler {
                 raw_event.serialize(bytes);
             });
     }
-
-    fn nanos_since_start(&self) -> u64 {
-        self.start_time.elapsed().as_nanos() as _
-    }
 }
 
 /// When dropped, this `TimingGuard` will record an "end" event in the
@@ -131,7 +138,7 @@ pub struct TimingGuard<'a> {
     event_id: EventId,
     event_kind: StringId,
     thread_id: u32,
-    start_ns: u64,
+    start_count: u64,
 }
 
 impl<'a> Drop for TimingGuard<'a> {
@@ -141,8 +148,8 @@ impl<'a> Drop for TimingGuard<'a> {
             self.event_kind,
             self.event_id,
             self.thread_id,
-            self.start_ns,
-            self.profiler.nanos_since_start(),
+            self.start_count,
+            self.profiler.counter.since_start(),
         );
 
         self.profiler.record_raw_event(&raw_event);
diff --git a/measureme/src/raw_event.rs b/measureme/src/raw_event.rs
index d8aac65..d2a8d20 100644
--- a/measureme/src/raw_event.rs
+++ b/measureme/src/raw_event.rs
@@ -12,22 +12,26 @@ pub struct RawEvent {
     pub event_id: EventId,
     pub thread_id: u32,
 
-    // The following 96 bits store the start and the end timestamp, using
+    // The following 96 bits store the start and the end counter value, using
     // 48 bits for each.
+    // FIXME(eddyb) s/time/count/
     pub start_time_lower: u32,
+    // FIXME(eddyb) s/time/count/
     pub end_time_lower: u32,
     pub start_and_end_upper: u32,
 }
 
-/// `RawEvents` that have an end time stamp with this value are instant events.
-const INSTANT_TIMESTAMP_MARKER: u64 = 0xFFFF_FFFF_FFFF;
+/// `RawEvents` that have an end counter value with this value are instant events.
+const INSTANT_COUNT_MARKER: u64 = 0xFFFF_FFFF_FFFF;
 
-/// The max instant timestamp we can represent with the 48 bits available.
+/// The max instant counter value we can represent with the 48 bits available.
+// FIXME(eddyb) s/TIMESTAMP/COUNT/
 pub const MAX_INSTANT_TIMESTAMP: u64 = 0xFFFF_FFFF_FFFF;
 
-/// The max interval timestamp we can represent with the 48 bits available.
-/// The highest value is reserved for the `INSTANT_TIMESTAMP_MARKER`.
-pub const MAX_INTERVAL_TIMESTAMP: u64 = INSTANT_TIMESTAMP_MARKER - 1;
+/// The max interval counter value we can represent with the 48 bits available.
+/// The highest value is reserved for the `INSTANT_COUNT_MARKER`.
+// FIXME(eddyb) s/TIMESTAMP/COUNT/
+pub const MAX_INTERVAL_TIMESTAMP: u64 = INSTANT_COUNT_MARKER - 1;
 
 impl RawEvent {
     #[inline]
@@ -35,17 +39,17 @@ impl RawEvent {
         event_kind: StringId,
         event_id: EventId,
         thread_id: u32,
-        start_nanos: u64,
-        end_nanos: u64,
+        start_count: u64,
+        end_count: u64,
     ) -> RawEvent {
-        assert!(start_nanos <= end_nanos);
-        assert!(end_nanos <= MAX_INTERVAL_TIMESTAMP);
+        assert!(start_count <= end_count);
+        assert!(end_count <= MAX_INTERVAL_TIMESTAMP);
 
-        let start_time_lower = start_nanos as u32;
-        let end_time_lower = end_nanos as u32;
+        let start_time_lower = start_count as u32;
+        let end_time_lower = end_count as u32;
 
-        let start_time_upper = (start_nanos >> 16) as u32 & 0xFFFF_0000;
-        let end_time_upper = (end_nanos >> 32) as u32;
+        let start_time_upper = (start_count >> 16) as u32 & 0xFFFF_0000;
+        let end_time_upper = (end_count >> 32) as u32;
 
         let start_and_end_upper = start_time_upper | end_time_upper;
 
@@ -64,14 +68,14 @@ impl RawEvent {
         event_kind: StringId,
         event_id: EventId,
         thread_id: u32,
-        timestamp_ns: u64,
+        count: u64,
     ) -> RawEvent {
-        assert!(timestamp_ns <= MAX_INSTANT_TIMESTAMP);
+        assert!(count <= MAX_INSTANT_TIMESTAMP);
 
-        let start_time_lower = timestamp_ns as u32;
+        let start_time_lower = count as u32;
         let end_time_lower = 0xFFFF_FFFF;
 
-        let start_time_upper = (timestamp_ns >> 16) as u32;
+        let start_time_upper = (count >> 16) as u32;
         let start_and_end_upper = start_time_upper | 0x0000_FFFF;
 
         RawEvent {
@@ -85,18 +89,20 @@ impl RawEvent {
     }
 
     #[inline]
+    // FIXME(eddyb) s/nanos/count/
     pub fn start_nanos(&self) -> u64 {
         self.start_time_lower as u64 | (((self.start_and_end_upper & 0xFFFF_0000) as u64) << 16)
     }
 
     #[inline]
+    // FIXME(eddyb) s/nanos/count/
     pub fn end_nanos(&self) -> u64 {
         self.end_time_lower as u64 | (((self.start_and_end_upper & 0x0000_FFFF) as u64) << 32)
     }
 
     #[inline]
     pub fn is_instant(&self) -> bool {
-        self.end_nanos() == INSTANT_TIMESTAMP_MARKER
+        self.end_nanos() == INSTANT_COUNT_MARKER
     }
 
     #[inline]
@@ -206,24 +212,24 @@ mod tests {
 
     #[test]
     #[should_panic]
-    fn invalid_instant_timestamp() {
+    fn invalid_instant_count() {
         let _ = RawEvent::new_instant(
             StringId::INVALID,
             EventId::INVALID,
             123,
-            // timestamp too large
+            // count too large
             MAX_INSTANT_TIMESTAMP + 1,
         );
     }
 
     #[test]
     #[should_panic]
-    fn invalid_start_timestamp() {
+    fn invalid_start_count() {
         let _ = RawEvent::new_interval(
             StringId::INVALID,
             EventId::INVALID,
             123,
-            // start timestamp too large
+            // start count too large
             MAX_INTERVAL_TIMESTAMP + 1,
             MAX_INTERVAL_TIMESTAMP + 1,
         );
@@ -231,50 +237,50 @@ mod tests {
 
     #[test]
     #[should_panic]
-    fn invalid_end_timestamp() {
+    fn invalid_end_count() {
         let _ = RawEvent::new_interval(
             StringId::INVALID,
             EventId::INVALID,
             123,
             0,
-            // end timestamp too large
+            // end count too large
             MAX_INTERVAL_TIMESTAMP + 3,
         );
     }
 
     #[test]
     #[should_panic]
-    fn invalid_end_timestamp2() {
+    fn invalid_end_count2() {
         let _ = RawEvent::new_interval(
             StringId::INVALID,
             EventId::INVALID,
             123,
             0,
-            INSTANT_TIMESTAMP_MARKER,
+            INSTANT_COUNT_MARKER,
         );
     }
 
     #[test]
     #[should_panic]
-    fn start_greater_than_end_timestamp() {
+    fn start_greater_than_end_count() {
         let _ = RawEvent::new_interval(
             StringId::INVALID,
             EventId::INVALID,
             123,
-            // start timestamp greater than end timestamp
+            // start count greater than end count
             1,
             0,
         );
     }
 
     #[test]
-    fn start_equal_to_end_timestamp() {
+    fn start_equal_to_end_count() {
         // This is allowed, make sure we don't panic
         let _ = RawEvent::new_interval(StringId::INVALID, EventId::INVALID, 123, 1, 1);
     }
 
     #[test]
-    fn interval_timestamp_decoding() {
+    fn interval_count_decoding() {
         // Check the upper limits
         let e = RawEvent::new_interval(
             StringId::INVALID,
@@ -319,7 +325,7 @@ mod tests {
     }
 
     #[test]
-    fn instant_timestamp_decoding() {
+    fn instant_count_decoding() {
         assert_eq!(
             RawEvent::new_instant(StringId::INVALID, EventId::INVALID, 987, 0,).start_nanos(),
             0