Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require object reference to be aligned #1159

Merged
merged 8 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions benches/mock_bench/sft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ use criterion::Criterion;
use mmtk::memory_manager;
use mmtk::util::test_util::fixtures::*;
use mmtk::util::test_util::mock_vm::*;
use mmtk::vm::ObjectModel;
use mmtk::vm::VMBinding;
use mmtk::AllocationSemantics;

pub fn bench(c: &mut Criterion) {
let mut fixture = MutatorFixture::create();
let addr = memory_manager::alloc(&mut fixture.mutator, 8, 8, 0, AllocationSemantics::Default);
let obj = <MockVM as VMBinding>::VMObjectModel::address_to_ref(addr);
let obj = MockVM::object_start_to_ref(addr);

c.bench_function("sft read", |b| {
b.iter(|| memory_manager::is_in_mmtk_spaces::<MockVM>(black_box(obj)))
Expand Down
5 changes: 2 additions & 3 deletions docs/dummyvm/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,6 @@ pub extern "C" fn mmtk_get_malloc_bytes() -> usize {
#[cfg(test)]
mod tests {
use super::*;
use crate::mmtk::vm::ObjectModel;
use std::ffi::CString;

#[test]
Expand Down Expand Up @@ -293,8 +292,8 @@ mod tests {
let addr = mmtk_alloc(mutator, 16, 8, 0, mmtk::AllocationSemantics::Default);
assert!(!addr.is_zero());

// Turn the allocation address into the object reference
let obj = crate::object_model::VMObjectModel::address_to_ref(addr);
// Turn the allocation address into the object reference.
let obj = DummyVM::object_start_to_ref(addr);

// Post allocation
mmtk_post_alloc(mutator, obj, 16, mmtk::AllocationSemantics::Default);
Expand Down
13 changes: 13 additions & 0 deletions docs/dummyvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ impl VMBinding for DummyVM {
const MAX_ALIGNMENT: usize = 1 << 6;
}

use mmtk::util::{Address, ObjectReference};

impl DummyVM {
pub fn object_start_to_ref(start: Address) -> ObjectReference {
// Safety: start is the allocation result, and it should not be zero with an offset.
unsafe {
ObjectReference::from_raw_address_unchecked(
start + crate::object_model::OBJECT_REF_OFFSET,
)
}
}
}

pub static SINGLETON: OnceLock<Box<MMTK<DummyVM>>> = OnceLock::new();

fn mmtk() -> &'static MMTK<DummyVM> {
Expand Down
26 changes: 9 additions & 17 deletions docs/dummyvm/src/object_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ use mmtk::vm::*;

pub struct VMObjectModel {}

// This is the offset from the allocation result to the object reference for the object.
// The binding can set this to a different value if the ObjectReference in the VM has an offset from the allocation starting address.
// Many methods like `address_to_ref` and `ref_to_address` use this constant.
// For bindings that this offset is not a constant, you can implement the calculation in the methods, and
// remove this constant.
/// This is the offset from the allocation result to the object reference for the object.
/// For bindings that this offset is not a constant, you can implement the calculation in the method `ref_to_object_start``, and
/// remove this constant.
pub const OBJECT_REF_OFFSET: usize = 0;

/// This is the offset from the object reference to an in-object address. The binding needs
/// to guarantee the in-object address is inside the storage associated with the object.
/// It has to be a constant offset. See `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`.
pub const IN_OBJECT_ADDRESS_OFFSET: isize = 0;

// This is the offset from the object reference to the object header.
// This value is used in `ref_to_header` where MMTk loads header metadata from.
pub const OBJECT_HEADER_OFFSET: usize = 0;
Expand Down Expand Up @@ -83,18 +86,7 @@ impl ObjectModel<DummyVM> for VMObjectModel {
object.to_raw_address().sub(OBJECT_HEADER_OFFSET)
}

fn ref_to_address(object: ObjectReference) -> Address {
// This method should return an address that is within the allocation.
// Using `ref_to_object_start` is always correct here.
// However, a binding may optimize this method to make it more efficient.
Self::ref_to_object_start(object)
}

fn address_to_ref(addr: Address) -> ObjectReference {
// This is the reverse operation of `ref_to_address`.
// If the implementation of `ref_to_address` is changed, this implementation needs to be changed accordingly.
unsafe { ObjectReference::from_raw_address_unchecked(addr.add(OBJECT_REF_OFFSET)) }
}
const IN_OBJECT_ADDRESS_OFFSET: isize = IN_OBJECT_ADDRESS_OFFSET;

fn dump_object(_object: ObjectReference) {
unimplemented!()
Expand Down
19 changes: 19 additions & 0 deletions docs/userguide/src/migration/prefix.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,25 @@ Notes for the mmtk-core developers:

<!-- Insert new versions here -->

## 0.27.0

### Introduce `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`

```admonish tldr
We used to have `ObjectModel::ref_to_address` and `ObjectModel::address_to_ref`, and require
the object reference and the in-object address to have a constant offset. Now, the two methods
are removed, and replaced with a constant `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`.
```

API changes:
* trait `ObjectModel`
- The methods `ref_to_address` and `address_to_ref` are removed.
- Users are required to specify `IN_OBJECT_ADDRESS_OFFSET` instead, which is the offset from the object
reference to the in-object address (the in-object address was the return value for the old `ref_to_address()`).
* type `ObjectReference`
- Add a constant `ALIGNMENT` which equals to the word size. All object references should be at least aligned
to the word size. This is checked in debug builds when an `ObjectReference` is constructed.

## 0.26.0

### Rename "edge" to "slot"
Expand Down
10 changes: 2 additions & 8 deletions src/memory_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,6 @@ pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
/// It is the byte granularity of the valid object (VO) bit.
/// 3. Return false otherwise. This function never panics.
///
/// Case 2 means **this function is imprecise for misaligned addresses**.
/// This function uses the "valid object (VO) bits" side metadata, i.e. a bitmap.
/// For space efficiency, each bit of the bitmap governs a small region of memory.
/// The size of a region is currently defined as the [minimum object size](crate::util::constants::MIN_OBJECT_SIZE),
Expand All @@ -606,13 +605,8 @@ pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
/// The alignment of a region is also the region size.
/// If a VO bit is `1`, the bitmap cannot tell which address within the 4-byte or 8-byte region
/// is the valid object reference.
/// Therefore, if the input `addr` is not properly aligned, but is close to a valid object
/// reference, this function may still return true.
///
/// For the reason above, the VM **must check if `addr` is properly aligned** before calling this
/// function. For most VMs, valid object references are always aligned to the word size, so
/// checking `addr.is_aligned_to(BYTES_IN_WORD)` should usually work. If you are paranoid, you can
/// always check against [`crate::util::is_mmtk_object::VO_BIT_REGION_SIZE`].
/// Therefore, if this method returns true, the binding can compute the object reference by
/// aligning the address to [`crate::util::ObjectReference::ALIGNMENT`].
///
/// This function is useful for conservative root scanning. The VM can iterate through all words in
/// a stack, filter out zeros, misaligned words, obviously out-of-range words (such as addresses
Expand Down
50 changes: 37 additions & 13 deletions src/util/address.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,11 @@ impl Address {
Address(self.0 - size)
}

/// Apply an signed offset to the address.
pub const fn offset(self, offset: isize) -> Address {
Address(self.0.wrapping_add_signed(offset))
}

/// Bitwise 'and' with a mask.
pub const fn and(self, mask: usize) -> usize {
self.0 & mask
Expand Down Expand Up @@ -486,17 +491,23 @@ use crate::vm::VMBinding;
/// `usize`. For the convenience of passing `Option<ObjectReference>` to and from native (C/C++)
/// programs, mmtk-core provides [`crate::util::api_util::NullableObjectReference`].
///
/// Note that [`ObjectReference`] has to be word aligned.
///
/// [NPO]: https://doc.rust-lang.org/std/option/index.html#representation
#[repr(transparent)]
#[derive(Copy, Clone, Eq, Hash, PartialOrd, Ord, PartialEq, NoUninit)]
pub struct ObjectReference(NonZeroUsize);

impl ObjectReference {
/// The required minimal alignment for object reference. If the object reference's raw address is not aligned to this value,
/// you will see an assertion failure in the debug build when constructing an object reference instance.
pub const ALIGNMENT: usize = crate::util::constants::BYTES_IN_ADDRESS;

/// Cast the object reference to its raw address. This method is mostly for the convinience of a binding.
///
/// MMTk should not make any assumption on the actual location of the address with the object reference.
/// MMTk should not assume the address returned by this method is in our allocation. For the purposes of
/// setting object metadata, MMTk should use [`crate::vm::ObjectModel::ref_to_address()`] or [`crate::vm::ObjectModel::ref_to_header()`].
/// setting object metadata, MMTk should use [`crate::util::ObjectReference::to_address`] or [`crate::util::ObjectReference::to_header`].
pub fn to_raw_address(self) -> Address {
Address(self.0.get())
}
Expand All @@ -506,9 +517,13 @@ impl ObjectReference {
///
/// If `addr` is 0, the result is `None`.
///
/// MMTk should not assume an arbitrary address can be turned into an object reference. MMTk can use [`crate::vm::ObjectModel::address_to_ref()`]
/// to turn addresses that are from [`crate::vm::ObjectModel::ref_to_address()`] back to object.
/// MMTk should not assume an arbitrary address can be turned into an object reference. MMTk can use [`crate::util::ObjectReference::from_address`]
/// to turn addresses that are from [`crate::util::ObjectReference::to_address`] back to object.
pub fn from_raw_address(addr: Address) -> Option<ObjectReference> {
debug_assert!(
addr.is_aligned_to(Self::ALIGNMENT),
"ObjectReference is required to be word aligned"
);
NonZeroUsize::new(addr.0).map(ObjectReference)
}

Expand All @@ -522,16 +537,19 @@ impl ObjectReference {
/// adding a positive offset to a non-zero address, we know the result must not be zero.
pub unsafe fn from_raw_address_unchecked(addr: Address) -> ObjectReference {
debug_assert!(!addr.is_zero());
debug_assert!(
addr.is_aligned_to(Self::ALIGNMENT),
"ObjectReference is required to be word aligned"
);
ObjectReference(NonZeroUsize::new_unchecked(addr.0))
}

/// Get the in-heap address from an object reference. This method is used by MMTk to get an in-heap address
/// for an object reference. This method is syntactic sugar for [`crate::vm::ObjectModel::ref_to_address`]. See the
/// comments on [`crate::vm::ObjectModel::ref_to_address`].
/// for an object reference.
pub fn to_address<VM: VMBinding>(self) -> Address {
use crate::vm::ObjectModel;
let to_address = VM::VMObjectModel::ref_to_address(self);
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || to_address == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, to_address);
let to_address = Address(self.0.get()).offset(VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET);
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || to_address == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, in-object addr is {}", self, to_address);
to_address
}

Expand All @@ -549,17 +567,23 @@ impl ObjectReference {
pub fn to_object_start<VM: VMBinding>(self) -> Address {
use crate::vm::ObjectModel;
let object_start = VM::VMObjectModel::ref_to_object_start(self);
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || object_start == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, object_start);
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || object_start == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_object_start() returns {}", self, object_start);
object_start
}

/// Get the object reference from an address that is returned from [`crate::util::address::ObjectReference::to_address`]
/// or [`crate::vm::ObjectModel::ref_to_address`]. This method is syntactic sugar for [`crate::vm::ObjectModel::address_to_ref`].
/// See the comments on [`crate::vm::ObjectModel::address_to_ref`].
/// Get the object reference from an address that is returned from [`crate::util::address::ObjectReference::to_address`].
pub fn from_address<VM: VMBinding>(addr: Address) -> ObjectReference {
use crate::vm::ObjectModel;
let obj = VM::VMObjectModel::address_to_ref(addr);
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, address_to_ref() returns {}", addr, obj);
let obj = unsafe {
ObjectReference::from_raw_address_unchecked(
addr.offset(-VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET),
wks marked this conversation as resolved.
Show resolved Hide resolved
)
};
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, the object reference is {}", addr, obj);
debug_assert!(
obj.to_raw_address().is_aligned_to(Self::ALIGNMENT),
"ObjectReference is required to be word aligned"
);
obj
}

Expand Down
8 changes: 3 additions & 5 deletions src/util/test_util/fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ pub struct SingleObject {

impl FixtureContent for SingleObject {
fn create() -> Self {
use crate::vm::object_model::ObjectModel;
let mut mutator = MutatorFixture::create();

// A relatively small object, typical for Ruby.
Expand All @@ -232,7 +231,7 @@ impl FixtureContent for SingleObject {
let addr = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics);
assert!(!addr.is_zero());

let objref = MockVM::address_to_ref(addr);
let objref = MockVM::object_start_to_ref(addr);
memory_manager::post_alloc(&mut mutator.mutator, objref, size, semantics);

SingleObject { objref, mutator }
Expand All @@ -257,7 +256,6 @@ pub struct TwoObjects {

impl FixtureContent for TwoObjects {
fn create() -> Self {
use crate::vm::object_model::ObjectModel;
let mut mutator = MutatorFixture::create();

let size = 128;
Expand All @@ -266,13 +264,13 @@ impl FixtureContent for TwoObjects {
let addr1 = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics);
assert!(!addr1.is_zero());

let objref1 = MockVM::address_to_ref(addr1);
let objref1 = MockVM::object_start_to_ref(addr1);
memory_manager::post_alloc(&mut mutator.mutator, objref1, size, semantics);

let addr2 = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics);
assert!(!addr2.is_zero());

let objref2 = MockVM::address_to_ref(addr2);
let objref2 = MockVM::object_start_to_ref(addr2);
memory_manager::post_alloc(&mut mutator.mutator, objref2, size, semantics);

TwoObjects {
Expand Down
25 changes: 9 additions & 16 deletions src/util/test_util/mock_vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use std::sync::Mutex;

/// The offset between object reference and the allocation address if we use
/// the default mock VM.
pub const DEFAULT_OBJECT_REF_OFFSET: usize = 4;
pub const DEFAULT_OBJECT_REF_OFFSET: usize = crate::util::constants::BYTES_IN_ADDRESS;

// To mock static methods, we have to create a static instance of `MockVM`.
lazy_static! {
Expand Down Expand Up @@ -231,8 +231,6 @@ pub struct MockVM {
MockMethod<(ObjectReference, Address), ObjectReference>,
pub ref_to_object_start: MockMethod<ObjectReference, Address>,
pub ref_to_header: MockMethod<ObjectReference, Address>,
pub ref_to_address: MockMethod<ObjectReference, Address>,
pub address_to_ref: MockMethod<Address, ObjectReference>,
pub dump_object: MockMethod<ObjectReference, ()>,
// reference glue
pub weakref_clear_referent: MockMethod<ObjectReference, ()>,
Expand Down Expand Up @@ -304,12 +302,6 @@ impl Default for MockVM {
object.to_raw_address().sub(DEFAULT_OBJECT_REF_OFFSET)
})),
ref_to_header: MockMethod::new_fixed(Box::new(|object| object.to_raw_address())),
ref_to_address: MockMethod::new_fixed(Box::new(|object| {
object.to_raw_address().sub(DEFAULT_OBJECT_REF_OFFSET)
})),
address_to_ref: MockMethod::new_fixed(Box::new(|addr| {
ObjectReference::from_raw_address(addr.add(DEFAULT_OBJECT_REF_OFFSET)).unwrap()
})),
dump_object: MockMethod::new_unimplemented(),

weakref_clear_referent: MockMethod::new_unimplemented(),
Expand Down Expand Up @@ -531,13 +523,8 @@ impl crate::vm::ObjectModel<MockVM> for MockVM {
mock!(ref_to_header(object))
}

fn ref_to_address(object: ObjectReference) -> Address {
mock!(ref_to_address(object))
}

fn address_to_ref(addr: Address) -> ObjectReference {
mock!(address_to_ref(addr))
}
// TODO: This is not mocked. We need a way to deal with it.
const IN_OBJECT_ADDRESS_OFFSET: isize = -(DEFAULT_OBJECT_REF_OFFSET as isize);

fn dump_object(object: ObjectReference) {
mock!(dump_object(object))
Expand Down Expand Up @@ -629,3 +616,9 @@ impl crate::vm::Scanning<MockVM> for MockVM {
mock_any!(forward_weak_refs(worker, tracer_context))
}
}

impl MockVM {
pub fn object_start_to_ref(start: Address) -> ObjectReference {
ObjectReference::from_raw_address(start + DEFAULT_OBJECT_REF_OFFSET).unwrap()
}
}
Loading
Loading