From 32269f4a34a0a25c4e041807c49bee16e99e9b9c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 23 Apr 2019 14:58:01 -0700 Subject: [PATCH] Document unsafety in `UnitInterner` --- src/cargo/core/compiler/unit.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/cargo/core/compiler/unit.rs b/src/cargo/core/compiler/unit.rs index 3adc2b2ef03..bf7d6f0871c 100644 --- a/src/cargo/core/compiler/unit.rs +++ b/src/cargo/core/compiler/unit.rs @@ -92,6 +92,13 @@ impl<'a> fmt::Debug for Unit<'a> { } } +/// A small structure used to "intern" `Unit` values. +/// +/// A `Unit` is just a thin pointer to an internal `UnitInner`. This is done to +/// ensure that `Unit` itself is quite small as well as enabling a very +/// efficient hash/equality implementation for `Unit`. All units are +/// manufactured through an interner which guarantees that each equivalent value +/// is only produced once. pub struct UnitInterner<'a> { state: RefCell>, } @@ -101,6 +108,7 @@ struct InternerState<'a> { } impl<'a> UnitInterner<'a> { + /// Creates a new blank interner pub fn new() -> UnitInterner<'a> { UnitInterner { state: RefCell::new(InternerState { @@ -109,6 +117,9 @@ impl<'a> UnitInterner<'a> { } } + /// Creates a new `unit` from its components. The returned `Unit`'s fields + /// will all be equivalent to the provided arguments, although they may not + /// be the exact same instance. pub fn intern( &'a self, pkg: &'a Package, @@ -127,9 +138,30 @@ impl<'a> UnitInterner<'a> { Unit { inner } } + // Ok so interning here is a little unsafe, hence the usage of `unsafe` + // internally. The primary issue here is that we've got an internal cache of + // `UnitInner` instances added so far, but we may need to mutate it to add + // it, and the mutation for an interner happens behind a shared borrow. + // + // Our goal though is to escape the lifetime `borrow_mut` to the same + // lifetime as the borrowed passed into this function. That's where `unsafe` + // comes into play. What we're subverting here is resizing internally in the + // `HashSet` as well as overwriting previous keys in the `HashSet`. + // + // As a result we store `Box` internally to have an extra layer + // of indirection. That way `*const UnitInner` is a stable address that + // doesn't change with `HashSet` resizing. Furthermore we're careful to + // never overwrite an entry once inserted. + // + // Ideally we'd use an off-the-shelf interner from crates.io which avoids a + // small amount of unsafety here, but at the time this was written one + // wasn't obviously available. fn intern_inner(&'a self, item: &UnitInner<'a>) -> &'a UnitInner<'a> { let mut me = self.state.borrow_mut(); if let Some(item) = me.cache.get(item) { + // note that `item` has type `&Box`. Use `&**` to + // convert that to `&UnitInner<'a>`, then do some trickery to extend + // the lifetime to the `'a` on the function here. return unsafe { &*(&**item as *const UnitInner<'a>) }; } me.cache.insert(Box::new(item.clone()));