Skip to content

Commit

Permalink
Rollup merge of rust-lang#101101 - RalfJung:read-pointer-as-bytes, r=…
Browse files Browse the repository at this point in the history
…oli-obk

interpret: make read-pointer-as-bytes a CTFE-only error with extra information

Next step in the reaction to rust-lang#99923. Also teaches Miri to implicitly strip provenance in more situations when transmuting pointers to integers, which fixes rust-lang/miri#2456.

Pointer-to-int transmutation during CTFE now produces a message like this:
```
   = help: this code performed an operation that depends on the underlying bytes representing a pointer
   = help: the absolute address of a pointer is not known at compile-time, so such operations are not supported
```

r? ``@oli-obk``
  • Loading branch information
Dylan-DPC authored Aug 30, 2022
2 parents 1ea8496 + c46e803 commit 81f3841
Show file tree
Hide file tree
Showing 46 changed files with 629 additions and 334 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_cranelift/src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
data_ctx.define(bytes.into_boxed_slice());

for &(offset, alloc_id) in alloc.relocations().iter() {
for &(offset, alloc_id) in alloc.provenance().iter() {
let addend = {
let endianness = tcx.data_layout.endian;
let offset = offset.bytes() as usize;
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
let size = Size::from_bytes(
4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */
);
alloc.inner().get_bytes(fx, alloc_range(offset, size)).unwrap()
alloc
.inner()
.get_bytes_strip_provenance(fx, alloc_range(offset, size))
.unwrap()
}
_ => unreachable!("{:?}", idx_const),
};
Expand Down
12 changes: 6 additions & 6 deletions compiler/rustc_codegen_gcc/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
//
// We could remove this hack whenever we decide to drop macOS 10.10 support.
if self.tcx.sess.target.options.is_like_osx {
// The `inspect` method is okay here because we checked relocations, and
// The `inspect` method is okay here because we checked for provenance, and
// because we are doing this access to inspect the final interpreter state
// (not as part of the interpreter execution).
//
Expand Down Expand Up @@ -296,17 +296,17 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {

pub fn const_alloc_to_gcc<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, alloc: ConstAllocation<'tcx>) -> RValue<'gcc> {
let alloc = alloc.inner();
let mut llvals = Vec::with_capacity(alloc.relocations().len() + 1);
let mut llvals = Vec::with_capacity(alloc.provenance().len() + 1);
let dl = cx.data_layout();
let pointer_size = dl.pointer_size.bytes() as usize;

let mut next_offset = 0;
for &(offset, alloc_id) in alloc.relocations().iter() {
for &(offset, alloc_id) in alloc.provenance().iter() {
let offset = offset.bytes();
assert_eq!(offset as usize as u64, offset);
let offset = offset as usize;
if offset > next_offset {
// This `inspect` is okay since we have checked that it is not within a relocation, it
// This `inspect` is okay since we have checked that it is not within a pointer with provenance, it
// is within the bounds of the allocation, and it doesn't affect interpreter execution
// (we inspect the result after interpreter execution). Any undef byte is replaced with
// some arbitrary byte value.
Expand All @@ -319,7 +319,7 @@ pub fn const_alloc_to_gcc<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, alloc: ConstAl
read_target_uint( dl.endian,
// This `inspect` is okay since it is within the bounds of the allocation, it doesn't
// affect interpreter execution (we inspect the result after interpreter execution),
// and we properly interpret the relocation as a relocation pointer offset.
// and we properly interpret the provenance as a relocation pointer offset.
alloc.inspect_with_uninit_and_ptr_outside_interpreter(offset..(offset + pointer_size)),
)
.expect("const_alloc_to_llvm: could not read relocation pointer")
Expand All @@ -336,7 +336,7 @@ pub fn const_alloc_to_gcc<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, alloc: ConstAl
}
if alloc.len() >= next_offset {
let range = next_offset..alloc.len();
// This `inspect` is okay since we have check that it is after all relocations, it is
// This `inspect` is okay since we have check that it is after all provenance, it is
// within the bounds of the allocation, and it doesn't affect interpreter execution (we
// inspect the result after interpreter execution). Any undef byte is replaced with some
// arbitrary byte value.
Expand Down
22 changes: 11 additions & 11 deletions compiler/rustc_codegen_llvm/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ use tracing::debug;

pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: ConstAllocation<'_>) -> &'ll Value {
let alloc = alloc.inner();
let mut llvals = Vec::with_capacity(alloc.relocations().len() + 1);
let mut llvals = Vec::with_capacity(alloc.provenance().len() + 1);
let dl = cx.data_layout();
let pointer_size = dl.pointer_size.bytes() as usize;

// Note: this function may call `inspect_with_uninit_and_ptr_outside_interpreter`,
// so `range` must be within the bounds of `alloc` and not contain or overlap a relocation.
// Note: this function may call `inspect_with_uninit_and_ptr_outside_interpreter`, so `range`
// must be within the bounds of `alloc` and not contain or overlap a pointer provenance.
fn append_chunks_of_init_and_uninit_bytes<'ll, 'a, 'b>(
llvals: &mut Vec<&'ll Value>,
cx: &'a CodegenCx<'ll, 'b>,
Expand Down Expand Up @@ -79,12 +79,12 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: ConstAllocation<
}

let mut next_offset = 0;
for &(offset, alloc_id) in alloc.relocations().iter() {
for &(offset, alloc_id) in alloc.provenance().iter() {
let offset = offset.bytes();
assert_eq!(offset as usize as u64, offset);
let offset = offset as usize;
if offset > next_offset {
// This `inspect` is okay since we have checked that it is not within a relocation, it
// This `inspect` is okay since we have checked that there is no provenance, it
// is within the bounds of the allocation, and it doesn't affect interpreter execution
// (we inspect the result after interpreter execution).
append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, next_offset..offset);
Expand All @@ -93,7 +93,7 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: ConstAllocation<
dl.endian,
// This `inspect` is okay since it is within the bounds of the allocation, it doesn't
// affect interpreter execution (we inspect the result after interpreter execution),
// and we properly interpret the relocation as a relocation pointer offset.
// and we properly interpret the provenance as a relocation pointer offset.
alloc.inspect_with_uninit_and_ptr_outside_interpreter(offset..(offset + pointer_size)),
)
.expect("const_alloc_to_llvm: could not read relocation pointer")
Expand Down Expand Up @@ -121,7 +121,7 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: ConstAllocation<
}
if alloc.len() >= next_offset {
let range = next_offset..alloc.len();
// This `inspect` is okay since we have check that it is after all relocations, it is
// This `inspect` is okay since we have check that it is after all provenance, it is
// within the bounds of the allocation, and it doesn't affect interpreter execution (we
// inspect the result after interpreter execution).
append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, range);
Expand Down Expand Up @@ -479,15 +479,15 @@ impl<'ll> StaticMethods for CodegenCx<'ll, '_> {
//
// We could remove this hack whenever we decide to drop macOS 10.10 support.
if self.tcx.sess.target.is_like_osx {
// The `inspect` method is okay here because we checked relocations, and
// The `inspect` method is okay here because we checked for provenance, and
// because we are doing this access to inspect the final interpreter state
// (not as part of the interpreter execution).
//
// FIXME: This check requires that the (arbitrary) value of undefined bytes
// happens to be zero. Instead, we should only check the value of defined bytes
// and set all undefined bytes to zero if this allocation is headed for the
// BSS.
let all_bytes_are_zero = alloc.relocations().is_empty()
let all_bytes_are_zero = alloc.provenance().is_empty()
&& alloc
.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len())
.iter()
Expand All @@ -511,9 +511,9 @@ impl<'ll> StaticMethods for CodegenCx<'ll, '_> {
section.as_str().as_ptr().cast(),
section.as_str().len() as c_uint,
);
assert!(alloc.relocations().is_empty());
assert!(alloc.provenance().is_empty());

// The `inspect` method is okay here because we checked relocations, and
// The `inspect` method is okay here because we checked for provenance, and
// because we are doing this access to inspect the final interpreter state (not
// as part of the interpreter execution).
let bytes =
Expand Down
13 changes: 13 additions & 0 deletions compiler/rustc_const_eval/src/const_eval/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use rustc_span::{Span, Symbol};
use super::InterpCx;
use crate::interpret::{
struct_error, ErrorHandled, FrameInfo, InterpError, InterpErrorInfo, Machine, MachineStopType,
UnsupportedOpInfo,
};

/// The CTFE machine has some custom error kinds.
Expand Down Expand Up @@ -149,6 +150,18 @@ impl<'tcx> ConstEvalErr<'tcx> {
if let Some(span_msg) = span_msg {
err.span_label(self.span, span_msg);
}
// Add some more context for select error types.
match self.error {
InterpError::Unsupported(
UnsupportedOpInfo::ReadPointerAsBytes
| UnsupportedOpInfo::PartialPointerOverwrite(_)
| UnsupportedOpInfo::PartialPointerCopy(_),
) => {
err.help("this code performed an operation that depends on the underlying bytes representing a pointer");
err.help("the absolute address of a pointer is not known at compile-time, so such operations are not supported");
}
_ => {}
}
// Add spans for the stacktrace. Don't print a single-line backtrace though.
if self.stacktrace.len() > 1 {
// Helper closure to print duplicated lines.
Expand Down
8 changes: 5 additions & 3 deletions compiler/rustc_const_eval/src/const_eval/eval_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use super::{CompileTimeEvalContext, CompileTimeInterpreter, ConstEvalErr};
use crate::interpret::eval_nullary_intrinsic;
use crate::interpret::{
intern_const_alloc_recursive, Allocation, ConstAlloc, ConstValue, CtfeValidationMode, GlobalId,
Immediate, InternKind, InterpCx, InterpResult, MPlaceTy, MemoryKind, OpTy, RefTracking,
StackPopCleanup,
Immediate, InternKind, InterpCx, InterpError, InterpResult, MPlaceTy, MemoryKind, OpTy,
RefTracking, StackPopCleanup,
};

use rustc_hir::def::DefKind;
Expand Down Expand Up @@ -387,7 +387,9 @@ pub fn eval_to_allocation_raw_provider<'tcx>(
ecx.tcx,
"it is undefined behavior to use this value",
|diag| {
diag.note(NOTE_ON_UNDEFINED_BEHAVIOR_ERROR);
if matches!(err.error, InterpError::UndefinedBehavior(_)) {
diag.note(NOTE_ON_UNDEFINED_BEHAVIOR_ERROR);
}
diag.note(&format!(
"the raw bytes of the constant ({}",
display_allocation(
Expand Down
14 changes: 7 additions & 7 deletions compiler/rustc_const_eval/src/interpret/intern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ fn intern_shallow<'rt, 'mir, 'tcx, M: CompileTimeMachine<'mir, 'tcx, const_eval:
alloc.mutability = Mutability::Not;
};
// link the alloc id to the actual allocation
leftover_allocations.extend(alloc.relocations().iter().map(|&(_, alloc_id)| alloc_id));
leftover_allocations.extend(alloc.provenance().iter().map(|&(_, alloc_id)| alloc_id));
let alloc = tcx.intern_const_alloc(alloc);
tcx.set_alloc_id_memory(alloc_id, alloc);
None
Expand Down Expand Up @@ -191,10 +191,10 @@ impl<'rt, 'mir, 'tcx: 'mir, M: CompileTimeMachine<'mir, 'tcx, const_eval::Memory
return Ok(true);
};

// If there are no relocations in this allocation, it does not contain references
// If there is no provenance in this allocation, it does not contain references
// that point to another allocation, and we can avoid the interning walk.
if let Some(alloc) = self.ecx.get_ptr_alloc(mplace.ptr, size, align)? {
if !alloc.has_relocations() {
if !alloc.has_provenance() {
return Ok(false);
}
} else {
Expand Down Expand Up @@ -233,8 +233,8 @@ impl<'rt, 'mir, 'tcx: 'mir, M: CompileTimeMachine<'mir, 'tcx, const_eval::Memory
}

fn visit_value(&mut self, mplace: &MPlaceTy<'tcx>) -> InterpResult<'tcx> {
// Handle Reference types, as these are the only relocations supported by const eval.
// Raw pointers (and boxes) are handled by the `leftover_relocations` logic.
// Handle Reference types, as these are the only types with provenance supported by const eval.
// Raw pointers (and boxes) are handled by the `leftover_allocations` logic.
let tcx = self.ecx.tcx;
let ty = mplace.layout.ty;
if let ty::Ref(_, referenced_ty, ref_mutability) = *ty.kind() {
Expand Down Expand Up @@ -410,7 +410,7 @@ pub fn intern_const_alloc_recursive<
// references and a `leftover_allocations` set (where we only have a todo-list here).
// So we hand-roll the interning logic here again.
match intern_kind {
// Statics may contain mutable allocations even behind relocations.
// Statics may point to mutable allocations.
// Even for immutable statics it would be ok to have mutable allocations behind
// raw pointers, e.g. for `static FOO: *const AtomicUsize = &AtomicUsize::new(42)`.
InternKind::Static(_) => {}
Expand Down Expand Up @@ -441,7 +441,7 @@ pub fn intern_const_alloc_recursive<
}
let alloc = tcx.intern_const_alloc(alloc);
tcx.set_alloc_id_memory(alloc_id, alloc);
for &(_, alloc_id) in alloc.inner().relocations().iter() {
for &(_, alloc_id) in alloc.inner().provenance().iter() {
if leftover_allocations.insert(alloc_id) {
todo.push(alloc_id);
}
Expand Down
21 changes: 17 additions & 4 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -687,10 +687,23 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?;
assert!(!layout.is_unsized());

let lhs = self.read_pointer(lhs)?;
let rhs = self.read_pointer(rhs)?;
let lhs_bytes = self.read_bytes_ptr(lhs, layout.size)?;
let rhs_bytes = self.read_bytes_ptr(rhs, layout.size)?;
let get_bytes = |this: &InterpCx<'mir, 'tcx, M>,
op: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
size|
-> InterpResult<'tcx, &[u8]> {
let ptr = this.read_pointer(op)?;
let Some(alloc_ref) = self.get_ptr_alloc(ptr, size, Align::ONE)? else {
// zero-sized access
return Ok(&[]);
};
if alloc_ref.has_provenance() {
throw_ub_format!("`raw_eq` on bytes with provenance");
}
alloc_ref.get_bytes_strip_provenance()
};

let lhs_bytes = get_bytes(self, lhs, layout.size)?;
let rhs_bytes = get_bytes(self, rhs, layout.size)?;
Ok(Scalar::from_bool(lhs_bytes == rhs_bytes))
}
}
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
/// cache the result. (This relies on `AllocMap::get_or` being able to add the
/// owned allocation to the map even when the map is shared.)
///
/// This must only fail if `alloc` contains relocations.
/// This must only fail if `alloc` contains provenance.
fn adjust_allocation<'b>(
ecx: &InterpCx<'mir, 'tcx, Self>,
id: AllocId,
Expand Down
Loading

0 comments on commit 81f3841

Please sign in to comment.