Skip to content

Commit

Permalink
Add new CheckMaybeUninit MIR transform
Browse files Browse the repository at this point in the history
This MIR transform inserts the same validity checks from
`mem::{uninitialized,zeroed}` to `MaybeUninit::{uninit,zeroed}().assume_init()`.

We have been panicking in `mem::uninit` on invalid values for quite some
time now, and it has helped to get people off the unsound API and
towards using `MaybeUninit<T>`.

While correct usage of `MaybeUninit<T>` is clearly documented, some
people still use it incorrectly and simply replaced their wrong
`mem::uninit` usage with `MaybeUninit::uninit().assume_init()`. This
is not any more correct than the old version, and we should still emit
panics in these cases. As this can't be done in the library only, we
need this MIR pass to insert the calls.

For now, it only detects direct usages of
`MaybeUninit::uninit().assume_init()` but it could be extended in the
future to do more advanced dataflow analysis.
  • Loading branch information
Noratrieb committed Feb 22, 2023
1 parent b869e84 commit 641251b
Show file tree
Hide file tree
Showing 15 changed files with 712 additions and 342 deletions.
4 changes: 4 additions & 0 deletions compiler/rustc_hir/src/lang_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,10 @@ language_item_table! {
RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None;

String, sym::String, string, Target::Struct, GenericRequirement::None;

// Lang item because the compiler inserts calls to it when uninit memory is used
AssertUninitValid, sym::assert_uninit_valid, assert_uninit_valid, Target::Fn, GenericRequirement::Exact(1);
AssertZeroValid, sym::assert_zero_valid, assert_zero_valid, Target::Fn, GenericRequirement::Exact(1);
}

pub enum GenericRequirement {
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_middle/src/mir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3022,6 +3022,11 @@ impl fmt::Debug for Location {
impl Location {
pub const START: Location = Location { block: START_BLOCK, statement_index: 0 };

/// Create a new location at the start of a basic block.
pub fn start_of_block(block: BasicBlock) -> Self {
Self { block, statement_index: 0 }
}

/// Returns the location immediately after this one within the enclosing block.
///
/// Note that if this location represents a terminator, then the
Expand Down
164 changes: 164 additions & 0 deletions compiler/rustc_mir_transform/src/check_maybe_uninit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//! This pass inserts the same validity checks into `MaybeUninit::{uninit,zeroed}().assert_init()`
//! as in `mem::{uninitialized,zeroed}`.
//!
//! Note that this module uses `uninit` to mean `uninit` or `zeroed` unless `zeroed` is used explicitly.
//!
//! It does this by first finding a call to `MaybeUninit::uninit`, and then figuring out
//! whether the successor basic block is a call to `MaybeUninit::assume_init` on the same local.
use rustc_const_eval::interpret;
use rustc_hir::def_id::DefId;
use rustc_middle::mir::patch::MirPatch;
use rustc_middle::mir::{
BasicBlock, BasicBlockData, Body, Constant, ConstantKind, Operand, Place, SourceInfo,
Terminator, TerminatorKind,
};
use rustc_middle::ty::{self, List, SubstsRef, TyCtxt};
use rustc_span::{sym, Span};

use crate::MirPass;

pub struct CheckMaybeUninit;

impl<'tcx> MirPass<'tcx> for CheckMaybeUninit {
fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
let mut patch = MirPatch::new(body);

for (mu_uninit_bb, _) in body.basic_blocks.iter_enumerated() {
let terminator = body.basic_blocks[mu_uninit_bb].terminator();

let TerminatorKind::Call {
func: mu_uninit_func,
target: assume_init_bb,
destination: uninit_place,
..
} = &terminator.kind else {
continue;
};

let Some((mu_method_def_id, substs)) = mu_uninit_func.const_fn_def() else {
continue;
};

let Some(assume_init_bb) = assume_init_bb else {
continue;
};

let Some((assume_init_operand, assume_init_call_span)) = is_block_just_assume_init(tcx, &body.basic_blocks[*assume_init_bb]) else {
continue;
};

let Some(assume_init_place) = assume_init_operand.place() else {
continue;
};

if assume_init_place != *uninit_place {
// The calls here are a little sketchy, but the place that is assumed to be init is not the place that was just crated
// as uninit, so we conservatively bail out.
continue;
}

// Select the right assertion intrinsic to call depending on which MaybeUninit method we called
let Some(init_check_def_id) = get_init_check_def_id(tcx, mu_method_def_id) else {
continue;
};

let assert_valid_bb = make_assert_valid_bb(
&mut patch,
tcx,
assume_init_call_span,
init_check_def_id,
*assume_init_bb,
substs,
);

let mut new_uninit_terminator = terminator.kind.clone();
match new_uninit_terminator {
TerminatorKind::Call { ref mut target, .. } => {
*target = Some(assert_valid_bb);
}
_ => unreachable!("terminator must be TerminatorKind::Call as checked above"),
}

patch.patch_terminator(mu_uninit_bb, new_uninit_terminator);
}

patch.apply(body);
}
}

fn is_block_just_assume_init<'tcx, 'blk>(
tcx: TyCtxt<'tcx>,
block: &'blk BasicBlockData<'tcx>,
) -> Option<(&'blk Operand<'tcx>, Span)> {
if block.statements.is_empty()
&& let TerminatorKind::Call {
func,
args,
fn_span,
..
} = &block.terminator().kind
&& let Some((def_id, _)) = func.const_fn_def()
&& tcx.is_diagnostic_item(sym::assume_init, def_id)
{
args.get(0).map(|operand| (operand, *fn_span))
} else {
None
}
}

fn get_init_check_def_id(tcx: TyCtxt<'_>, mu_method_def_id: DefId) -> Option<DefId> {
if tcx.is_diagnostic_item(sym::maybe_uninit_uninit, mu_method_def_id) {
tcx.lang_items().assert_uninit_valid()
} else if tcx.is_diagnostic_item(sym::maybe_uninit_zeroed, mu_method_def_id) {
tcx.lang_items().assert_zero_valid()
} else {
None
}
}

fn make_assert_valid_bb<'tcx>(
patch: &mut MirPatch<'tcx>,
tcx: TyCtxt<'tcx>,
fn_span: Span,
init_check_def_id: DefId,
target_bb: BasicBlock,
substs: SubstsRef<'tcx>,
) -> BasicBlock {
let func = make_fn_operand_for_assert_valid(tcx, init_check_def_id, fn_span, substs);

let local = patch.new_temp(tcx.types.unit, fn_span);

let terminator = TerminatorKind::Call {
func,
args: vec![],
destination: Place { local, projection: List::empty() },
target: Some(target_bb),
cleanup: Some(patch.resume_block()),
from_hir_call: true,
fn_span,
};

let terminator = Terminator { source_info: SourceInfo::outermost(fn_span), kind: terminator };

let bb_data = BasicBlockData::new(Some(terminator));

let block = patch.new_block(bb_data);
block
}

fn make_fn_operand_for_assert_valid<'tcx>(
tcx: TyCtxt<'tcx>,
def_id: DefId,
span: Span,
substs: SubstsRef<'tcx>,
) -> Operand<'tcx> {
let fn_ty = ty::FnDef(def_id, substs);
let fn_ty = tcx.mk_ty(fn_ty);

Operand::Constant(Box::new(Constant {
span,
literal: ConstantKind::Val(interpret::ConstValue::ZeroSized, fn_ty),
user_ty: None,
}))
}
4 changes: 4 additions & 0 deletions compiler/rustc_mir_transform/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ mod add_call_guards;
mod add_moves_for_packed_drops;
mod add_retag;
mod check_const_item_mutation;
mod check_maybe_uninit;
mod check_packed_ref;
pub mod check_unsafety;
// This pass is public to allow external drivers to perform MIR cleanup
Expand Down Expand Up @@ -300,6 +301,7 @@ fn mir_const(tcx: TyCtxt<'_>, def: ty::WithOptConstParam<LocalDefId>) -> &Steal<
&Lint(check_const_item_mutation::CheckConstItemMutation),
&Lint(function_item_references::FunctionItemReferences),
// What we need to do constant evaluation.
&check_maybe_uninit::CheckMaybeUninit,
&simplify::SimplifyCfg::new("initial"),
&rustc_peek::SanityCheck, // Just a lint
],
Expand Down Expand Up @@ -497,6 +499,8 @@ fn run_analysis_to_runtime_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>
/// After this series of passes, no lifetime analysis based on borrowing can be done.
fn run_analysis_cleanup_passes<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
let passes: &[&dyn MirPass<'tcx>] = &[
// FIXME: Preferably we'd run this before const eval once the stability of the wrapper function is figured out
&check_maybe_uninit::CheckMaybeUninit,
&cleanup_post_borrowck::CleanupPostBorrowck,
&remove_noop_landing_pads::RemoveNoopLandingPads,
&simplify::SimplifyCfg::new("early-opt"),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ symbols! {
assert_mem_uninitialized_valid,
assert_ne_macro,
assert_receiver_is_total_eq,
assert_uninit_valid,
assert_zero_valid,
asserting,
associated_const_equality,
Expand Down
46 changes: 46 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,14 @@ extern "rust-intrinsic" {
#[rustc_safe_intrinsic]
pub fn assert_mem_uninitialized_valid<T>();

/// A guard for `std::mem::uninitialized`. This will statically either panic, or do nothing.
///
/// This intrinsic does not have a stable counterpart.
#[rustc_const_unstable(feature = "const_assert_type2", issue = "none")]
#[rustc_safe_intrinsic]
#[cfg(bootstrap)]
pub fn assert_uninit_valid<T>();

/// Gets a reference to a static `Location` indicating where it was called.
///
/// Note that, unlike most intrinsics, this is safe to call;
Expand Down Expand Up @@ -2527,3 +2535,41 @@ pub const unsafe fn write_bytes<T>(dst: *mut T, val: u8, count: usize) {
write_bytes(dst, val, count)
}
}

// Wrappers around the init assertion intrinsics. Calls to these
// are inserted in the check_maybe_uninit mir pass

#[unstable(
feature = "maybe_uninit_checks_internals",
issue = "none",
reason = "implementation detail of panics on invalid MaybeUninit usage"
)]
#[rustc_allow_const_fn_unstable(const_assert_type2)]
#[rustc_const_stable(
feature = "const_maybe_uninit_checks_internals",
since = "CURRENT_RUSTC_VERSION"
)]
#[cfg_attr(not(bootstrap), lang = "assert_zero_valid")]
#[track_caller]
pub const fn assert_zero_valid_wrapper<T>() {
assert_zero_valid::<T>();
}

#[unstable(
feature = "maybe_uninit_checks_internals",
issue = "none",
reason = "implementation detail of panics on invalid MaybeUninit usage"
)]
#[rustc_allow_const_fn_unstable(const_assert_type2)]
#[rustc_const_stable(
feature = "const_maybe_uninit_checks_internals",
since = "CURRENT_RUSTC_VERSION"
)]
#[cfg_attr(not(bootstrap), lang = "assert_uninit_valid")]
#[track_caller]
pub const fn assert_uninit_valid_wrapper<T>() {
#[cfg(bootstrap)]
assert_uninit_valid::<T>();
#[cfg(not(bootstrap))]
assert_mem_uninitialized_valid::<T>();
}
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
#![feature(const_align_of_val_raw)]
#![feature(const_alloc_layout)]
#![feature(const_arguments_as_str)]
#![feature(const_assert_type2)]
#![feature(const_array_into_iter_constructors)]
#![feature(const_bigint_helper_methods)]
#![feature(const_black_box)]
Expand Down
4 changes: 4 additions & 0 deletions library/core/src/mem/maybe_uninit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,10 @@ impl<T> MaybeUninit<T> {
// This also means that `self` must be a `value` variant.
unsafe {
intrinsics::assert_inhabited::<T>();

// When this function is called directly after <MaybeUninit<T>>::uninit, we insert calls to
// `intrinsics::assert_zero_valid_wrapper` or `intrinsics::assert_uninit_valid_wrapper` respectively.

ManuallyDrop::into_inner(self.value)
}
}
Expand Down
90 changes: 90 additions & 0 deletions tests/mir-opt/check_maybe_uninit.main.CheckMaybeUninit.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
- // MIR for `main` before CheckMaybeUninit
+ // MIR for `main` after CheckMaybeUninit

| User Type Annotations
| 0: user_ty: Canonical { max_universe: U0, variables: [CanonicalVarInfo { kind: Ty(General(U0)) }], value: TypeOf(DefId(2:2022 ~ core[4f75]::mem::maybe_uninit::{impl#2}::uninit), UserSubsts { substs: [^0], user_self_ty: Some(UserSelfTy { impl_def_id: DefId(2:2019 ~ core[4f75]::mem::maybe_uninit::{impl#2}), self_ty: std::mem::MaybeUninit<u8> }) }) }, span: $DIR/check-maybe-uninit.rs:6:17: 6:42, inferred_ty: fn() -> std::mem::MaybeUninit<u8> {std::mem::MaybeUninit::<u8>::uninit}
| 1: user_ty: Canonical { max_universe: U0, variables: [CanonicalVarInfo { kind: Ty(General(U0)) }], value: TypeOf(DefId(2:2022 ~ core[4f75]::mem::maybe_uninit::{impl#2}::uninit), UserSubsts { substs: [^0], user_self_ty: Some(UserSelfTy { impl_def_id: DefId(2:2019 ~ core[4f75]::mem::maybe_uninit::{impl#2}), self_ty: std::mem::MaybeUninit<std::string::String> }) }) }, span: $DIR/check-maybe-uninit.rs:7:17: 7:46, inferred_ty: fn() -> std::mem::MaybeUninit<std::string::String> {std::mem::MaybeUninit::<std::string::String>::uninit}
|
fn main() -> () {
let mut _0: (); // return place in scope 0 at $DIR/check-maybe-uninit.rs:+0:11: +0:11
let mut _1: u8; // in scope 0 at $DIR/check-maybe-uninit.rs:+2:17: +2:58
let mut _2: std::mem::MaybeUninit<u8>; // in scope 0 at $DIR/check-maybe-uninit.rs:+2:17: +2:44
let mut _3: std::string::String; // in scope 0 at $DIR/check-maybe-uninit.rs:+3:17: +3:62
let mut _4: std::mem::MaybeUninit<std::string::String>; // in scope 0 at $DIR/check-maybe-uninit.rs:+3:17: +3:48
+ let mut _5: (); // in scope 0 at $DIR/check-maybe-uninit.rs:+2:45: +2:58
+ let mut _6: (); // in scope 0 at $DIR/check-maybe-uninit.rs:+3:49: +3:62
scope 1 {
scope 2 {
scope 3 {
}
}
}

bb0: {
StorageLive(_1); // scope 1 at $DIR/check-maybe-uninit.rs:+2:17: +2:58
StorageLive(_2); // scope 1 at $DIR/check-maybe-uninit.rs:+2:17: +2:44
- _2 = MaybeUninit::<u8>::uninit() -> [return: bb1, unwind: bb6]; // scope 1 at $DIR/check-maybe-uninit.rs:+2:17: +2:44
+ _2 = MaybeUninit::<u8>::uninit() -> [return: bb7, unwind: bb6]; // scope 1 at $DIR/check-maybe-uninit.rs:+2:17: +2:44
// mir::Constant
// + span: $DIR/check-maybe-uninit.rs:6:17: 6:42
// + user_ty: UserType(0)
// + literal: Const { ty: fn() -> MaybeUninit<u8> {MaybeUninit::<u8>::uninit}, val: Value(<ZST>) }
}

bb1: {
_1 = MaybeUninit::<u8>::assume_init(move _2) -> [return: bb2, unwind: bb6]; // scope 1 at $DIR/check-maybe-uninit.rs:+2:17: +2:58
// mir::Constant
// + span: $DIR/check-maybe-uninit.rs:6:45: 6:56
// + literal: Const { ty: unsafe fn(MaybeUninit<u8>) -> u8 {MaybeUninit::<u8>::assume_init}, val: Value(<ZST>) }
}

bb2: {
StorageDead(_2); // scope 1 at $DIR/check-maybe-uninit.rs:+2:57: +2:58
StorageDead(_1); // scope 1 at $DIR/check-maybe-uninit.rs:+2:58: +2:59
StorageLive(_3); // scope 2 at $DIR/check-maybe-uninit.rs:+3:17: +3:62
StorageLive(_4); // scope 2 at $DIR/check-maybe-uninit.rs:+3:17: +3:48
- _4 = MaybeUninit::<String>::uninit() -> [return: bb3, unwind: bb6]; // scope 2 at $DIR/check-maybe-uninit.rs:+3:17: +3:48
+ _4 = MaybeUninit::<String>::uninit() -> [return: bb8, unwind: bb6]; // scope 2 at $DIR/check-maybe-uninit.rs:+3:17: +3:48
// mir::Constant
// + span: $DIR/check-maybe-uninit.rs:7:17: 7:46
// + user_ty: UserType(1)
// + literal: Const { ty: fn() -> MaybeUninit<String> {MaybeUninit::<String>::uninit}, val: Value(<ZST>) }
}

bb3: {
_3 = MaybeUninit::<String>::assume_init(move _4) -> [return: bb4, unwind: bb6]; // scope 2 at $DIR/check-maybe-uninit.rs:+3:17: +3:62
// mir::Constant
// + span: $DIR/check-maybe-uninit.rs:7:49: 7:60
// + literal: Const { ty: unsafe fn(MaybeUninit<String>) -> String {MaybeUninit::<String>::assume_init}, val: Value(<ZST>) }
}

bb4: {
StorageDead(_4); // scope 2 at $DIR/check-maybe-uninit.rs:+3:61: +3:62
drop(_3) -> [return: bb5, unwind: bb6]; // scope 2 at $DIR/check-maybe-uninit.rs:+3:62: +3:63
}

bb5: {
StorageDead(_3); // scope 2 at $DIR/check-maybe-uninit.rs:+3:62: +3:63
_0 = const (); // scope 1 at $DIR/check-maybe-uninit.rs:+1:5: +4:6
return; // scope 0 at $DIR/check-maybe-uninit.rs:+5:2: +5:2
}

bb6 (cleanup): {
resume; // scope 0 at $DIR/check-maybe-uninit.rs:+0:1: +5:2
+ }
+
+ bb7: {
+ _5 = <ZST>: fn() {assert_uninit_valid_wrapper::<u8>}() -> [return: bb1, unwind: bb6]; // scope 0 at $DIR/check-maybe-uninit.rs:+2:45: +2:58
+ // mir::Constant
+ // + span: $DIR/check-maybe-uninit.rs:6:45: 6:58
+ // + literal: Const { ty: fn() {assert_uninit_valid_wrapper::<u8>}, val: Value(ValTree::Branch(..)) }
+ }
+
+ bb8: {
+ _6 = <ZST>: fn() {assert_uninit_valid_wrapper::<String>}() -> [return: bb3, unwind: bb6]; // scope 0 at $DIR/check-maybe-uninit.rs:+3:49: +3:62
+ // mir::Constant
+ // + span: $DIR/check-maybe-uninit.rs:7:49: 7:62
+ // + literal: Const { ty: fn() {assert_uninit_valid_wrapper::<String>}, val: Value(ValTree::Branch(..)) }
}
}

9 changes: 9 additions & 0 deletions tests/mir-opt/check_maybe_uninit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use std::mem::MaybeUninit;

// EMIT_MIR check_maybe_uninit.main.CheckMaybeUninit.diff
fn main() {
unsafe {
let _ = MaybeUninit::<u8>::uninit().assume_init();
let _ = MaybeUninit::<String>::uninit().assume_init();
}
}
Loading

0 comments on commit 641251b

Please sign in to comment.