Skip to content

Commit

Permalink
Require ObjectReference to point inside object (#1195)
Browse files Browse the repository at this point in the history
Require the raw address of `ObjectReference` to be within the address
range of the object it refers to. The raw address is now used directly
for side metadata access and SFT dispatching. This makes "in-object
address" unnecessary, and we removed the concept of "in-object address"
and related constants and methods.

Methods which use the "in-object address" for SFT dispatching or
side-metadata access used to have a `<VM: VMBinding>` type parameter.
This PR removes that type parameter.

Because `ObjectReference` is now both within an object an word-aligned,
the algorithm for searching for VO bits from internal pointers is
slightly simplified. The method `is_mmtk_object` now has undefined
behavior for arguments that are zero or misaligned because they are
obviously illegal addresses for `ObjectReference`, and the user should
have filtered them out in the first place.

Fixes: #1170
  • Loading branch information
wks committed Sep 6, 2024
1 parent b3385b8 commit 45cdf31
Show file tree
Hide file tree
Showing 44 changed files with 447 additions and 386 deletions.
10 changes: 2 additions & 8 deletions benches/mock_bench/internal_pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,7 @@ pub fn bench(c: &mut Criterion) {
);
let obj_end = addr + NORMAL_OBJECT_SIZE;
_b.iter(|| {
memory_manager::find_object_from_internal_pointer::<MockVM>(
obj_end - 1,
NORMAL_OBJECT_SIZE,
);
memory_manager::find_object_from_internal_pointer(obj_end - 1, NORMAL_OBJECT_SIZE);
})
}
#[cfg(not(feature = "is_mmtk_object"))]
Expand Down Expand Up @@ -83,10 +80,7 @@ pub fn bench(c: &mut Criterion) {
);
let obj_end = addr + LARGE_OBJECT_SIZE;
_b.iter(|| {
memory_manager::find_object_from_internal_pointer::<MockVM>(
obj_end - 1,
LARGE_OBJECT_SIZE,
);
memory_manager::find_object_from_internal_pointer(obj_end - 1, LARGE_OBJECT_SIZE);
})
}
#[cfg(not(feature = "is_mmtk_object"))]
Expand Down
2 changes: 1 addition & 1 deletion benches/mock_bench/sft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ pub fn bench(c: &mut Criterion) {
let obj = MockVM::object_start_to_ref(addr);

c.bench_function("sft read", |b| {
b.iter(|| memory_manager::is_in_mmtk_spaces::<MockVM>(black_box(obj)))
b.iter(|| memory_manager::is_in_mmtk_spaces(black_box(obj)))
});
}
8 changes: 4 additions & 4 deletions docs/dummyvm/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,23 +142,23 @@ pub extern "C" fn mmtk_total_bytes() -> usize {

#[no_mangle]
pub extern "C" fn mmtk_is_live_object(object: ObjectReference) -> bool {
memory_manager::is_live_object::<DummyVM>(object)
memory_manager::is_live_object(object)
}

#[no_mangle]
pub extern "C" fn mmtk_will_never_move(object: ObjectReference) -> bool {
!object.is_movable::<DummyVM>()
!object.is_movable()
}

#[cfg(feature = "is_mmtk_object")]
#[no_mangle]
pub extern "C" fn mmtk_is_mmtk_object(addr: Address) -> bool {
memory_manager::is_mmtk_object(addr)
memory_manager::is_mmtk_object(addr).is_some()
}

#[no_mangle]
pub extern "C" fn mmtk_is_in_mmtk_spaces(object: ObjectReference) -> bool {
memory_manager::is_in_mmtk_spaces::<DummyVM>(object)
memory_manager::is_in_mmtk_spaces(object)
}

#[no_mangle]
Expand Down
9 changes: 1 addition & 8 deletions docs/dummyvm/src/object_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,10 @@ use mmtk::vm::*;
pub struct VMObjectModel {}

/// This is the offset from the allocation result to the object reference for the object.
/// For bindings that this offset is not a constant, you can implement the calculation in the method `ref_to_object_start``, and
/// For bindings that this offset is not a constant, you can implement the calculation in the method `ref_to_object_start`, and
/// remove this constant.
pub const OBJECT_REF_OFFSET: usize = 0;

/// This is the offset from the object reference to an in-object address. The binding needs
/// to guarantee the in-object address is inside the storage associated with the object.
/// It has to be a constant offset. See `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`.
pub const IN_OBJECT_ADDRESS_OFFSET: isize = 0;

// This is the offset from the object reference to the object header.
// This value is used in `ref_to_header` where MMTk loads header metadata from.
pub const OBJECT_HEADER_OFFSET: usize = 0;
Expand Down Expand Up @@ -86,8 +81,6 @@ impl ObjectModel<DummyVM> for VMObjectModel {
object.to_raw_address().sub(OBJECT_HEADER_OFFSET)
}

const IN_OBJECT_ADDRESS_OFFSET: isize = IN_OBJECT_ADDRESS_OFFSET;

fn dump_object(_object: ObjectReference) {
unimplemented!()
}
Expand Down
48 changes: 48 additions & 0 deletions docs/userguide/src/migration/prefix.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,54 @@ Notes for the mmtk-core developers:

<!-- Insert new versions here -->

## 0.28.0

### `ObjectReference` must point inside an object

```admonish tldr
`ObjectReference` is now required to be an address within an object. The concept of "in-object
address" and related methods are removed. Some methods which used to depend on the "in-object
address" no longer need the `<VM>` type argument.
```

API changes:

- struct `ObjectReference`
+ Its "raw address" must be within an object now.
+ The following methods which were used to access the in-object address are removed.
* `from_address`
* `to_address`
* When accessing side metadata, the "raw address" should be used, instead.
+ The following methods no longer have the `<VM>` type argument.
* `get_forwarded_object`
* `is_in_any_space`
* `is_live`
* `is_movable`
* `is_reachable`
- module `memory_manager`
+ `is_mmtk_object`: It now requires the address parameter to be non-zero and word-aligned.
* Otherwise it will not be a legal `ObjectReference` in the first place. The user should
filter out such illegal values.
+ The following functions no longer have the `<VM>` type argument.
* `find_object_from_internal_pointer`
* `is_in_mmtk_space`
* `is_live_object`
* `is_pinned`
* `pin_object`
* `unpin_object`
- struct `Region`
+ The following methods no longer have the `<VM>` type argument.
* `containing`
- trait `ObjectModel`
+ `IN_OBJECT_ADDRESS_OFFSET`: removed because it is no longer needed.

See also:

- PR: <https://github.com/mmtk/mmtk-core/issues/1170>
- Examples:
+ https://github.com/mmtk/mmtk-openjdk/pull/286: a simple case
+ https://github.com/mmtk/mmtk-jikesrvm/issues/178: a VM that needs much change for this

## 0.27.0

### `is_mmtk_object` returns `Option<ObjectReference>
Expand Down
54 changes: 44 additions & 10 deletions docs/userguide/src/portingguide/howto/nogc.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,39 @@ We recommend going through the [list of metadata specifications](https://docs.mm

#### `ObjectReference` vs `Address`

A key principle in MMTk is the distinction between [`ObjectReference`](https://docs.mmtk.io/api/mmtk/util/address/struct.ObjectReference.html) and [`Address`](https://docs.mmtk.io/api/mmtk/util/address/struct.Address.html). The idea is that very few operations are allowed on an `ObjectReference`. For example, MMTk does not allow address arithmetic on `ObjectReference`s. This allows us to preserve memory-safety, only performing unsafe operations when required, and gives us a cleaner and more flexible abstraction to work with as it can allow object handles or offsets etc. `Address`, on the other hand, represents an arbitrary machine address. You might be interested in reading the *Demystifying Magic: High-level Low-level Programming* paper[^3] which describes the above in more detail.

In MMTk, `ObjectReference` is a special address that represents an object. A binding may use tagged references, compressed pointers, etc.
They need to deal with the encoding and the decoding in their [`Slot`](https://docs.mmtk.io/api/mmtk/vm/slot/trait.Slot.html) implementation,
and always present plain `ObjectReference`s to MMTk. See [this test](https://github.com/mmtk/mmtk-core/blob/master/src/vm/tests/mock_tests/mock_test_slots.rs) for some `Slot` implementation examples.

[^3]: https://users.cecs.anu.edu.au/~steveb/pubs/papers/vmmagic-vee-2009.pdf
A key principle in MMTk is the distinction between [`ObjectReference`](https://docs.mmtk.io/api/mmtk/util/address/struct.ObjectReference.html) and [`Address`](https://docs.mmtk.io/api/mmtk/util/address/struct.Address.html). The idea is that very few operations are allowed on an `ObjectReference`. For example, MMTk does not allow address arithmetic on `ObjectReference`s. This allows us to preserve memory-safety, only performing unsafe operations when required, and gives us a cleaner and more flexible abstraction to work with as it can allow object handles or offsets etc. `Address`, on the other hand, represents an arbitrary machine address. You might be interested in reading the [*Demystifying Magic: High-level Low-level Programming*][FBC09] paper which describes the above in more detail.

In MMTk, `ObjectReference` is a special address that represents an object. It is required to be
within the address range of the object it refers to, and must be word-aligned. This address is used
by MMTk to access side metadata, and find the space or regions (chunk, block, line, etc.) that
contains the object. It must also be efficient to locate the object header (where in-header MMTk
metadata are held) and the object's VM-specific metadata, such as type information, from a given
`ObjectReference`. MMTk will need to access those information, either directly or indirectly via
traits implemented by the binding, during tracing, which is performance-critical.

The address used as `ObjectReference` is nominated by the VM binding when an object is allocated (or
moved by a moving GC, which we can ignore for now when supporting NoGC). VMs usually have their own
concepts of "object reference" which refer to objects. Some of them, including OpenJDK and CRuby,
uses addresses to the object (the starting address or at an offset within the object) to refer to an
object. Such VMs can directly use their "object reference" for the address of MMTk's
`ObjectReference`.

Some VMs, such as JikesRVM, refers to an object by an address at a constant offset after the header,
and can be outside the object. This does not satisfy the requirement of MMTk's `ObjectReference`,
and the VM binding needs to make a clear distinction between the VM-level object reference and
MMTk's `ObjectReference` type. A detailed example for supporting such a VM can be found
[here][jikesrvm-objref].

Other VMs may use tagged references, compressed pointers, etc. They need to convert them to plain
addresses to be used as MMTk's `ObjectReference`. Specifically, if the VM use such representations
in object fields, the VM binding can deal with the encoding and the decoding in its
[`Slot`][slot-trait] implementation, and always present plain `ObjectReference`s to MMTk. See [this
test] for some `Slot` implementation examples.

[FBC09]: https://users.cecs.anu.edu.au/~steveb/pubs/papers/vmmagic-vee-2009.pdf
[jikesrvm-objref]: https://github.com/mmtk/mmtk-jikesrvm/issues/178
[slot-trait]: https://docs.mmtk.io/api/mmtk/vm/slot/trait.Slot.html
[slot-test]: https://github.com/mmtk/mmtk-core/blob/master/src/vm/tests/mock_tests/mock_test_slots.rs

#### Miscellaneous configuration options

Expand Down Expand Up @@ -261,7 +287,7 @@ void *mmtk_alloc(MmtkMutator mutator, size_t size, size_t align,
* Set relevant object metadata
*
* @param mutator the mutator instance that is requesting the allocation
* @param object the returned address of the allocated object
* @param object the ObjectReference address chosen by the VM binding
* @param size the size of the allocated object
* @param allocator the allocation semantics to use for the allocation
*/
Expand All @@ -274,13 +300,21 @@ In order to perform allocations, you will need to know what object alignment the

Now that MMTk is aware of each mutator thread, you have to change the runtime's allocation functions to call into MMTk to allocate using `mmtk_alloc` and set object metadata using `mmtk_post_alloc`. Note that there may be multiple allocation functions in the runtime so make sure that you edit them all!

You should use the saved `Mutator` pointer as the first parameter, the requested object size as the next parameter, and any alignment requirements the runtimes has as the third parameter.
When calling `mmtk_alloc`, you should use the saved `Mutator` pointer as the first parameter, the requested object size as the next parameter, and any alignment requirements the runtimes has as the third parameter.

If your runtime requires a non-zero allocation offset (i.e. the alignment requirements are for the offset address, not the returned address) then you have to provide the required value as the fourth parameter. Note that you ***must*** also update the [`USE_ALLOCATION_OFFSET`](https://docs.mmtk.io/api/mmtk/vm/trait.VMBinding.html#associatedconstant.USE_ALLOCATION_OFFSET) constant in the `VMBinding` implementation if your runtime requires a non-zero allocation offset.

For the time-being, you can ignore the `allocator` parameter in both these functions and always pass a value of `0` which means MMTk will pick the default allocator for your collector (a bump pointer allocator in the case of NoGC).

Finally, you need to call `mmtk_post_alloc` with the object address returned from the previous `mmtk_alloc` call in order to initialize object metadata.
The return value of `mmtk_alloc` is the starting address of the allocated object.

Then you should nominate a word-aligned address within the allocated bytes to be the
`ObjectReference` used to refer to that object from now on. It doesn't have to be the starting
address.

Finally, you need to call `mmtk_post_alloc` with your chosen `ObjectReference` in order to
initialize MMTk-level object metadata, such as logging bits, valid-object (VO) bits, etc. As a VM
binding developer, you can ignore the details for now.

**Note:** Currently MMTk assumes object sizes are multiples of the `MIN_ALIGNMENT`. If you encounter errors with alignment, a simple workaround would be to align the requested object size up to the `MIN_ALIGNMENT`. See [here](https://github.com/mmtk/mmtk-core/issues/730) for the tracking issue to fix this bug.

Expand Down
50 changes: 27 additions & 23 deletions src/memory_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,16 +579,17 @@ pub fn handle_user_collection_request<VM: VMBinding>(mmtk: &MMTK<VM>, tls: VMMut
///
/// Arguments:
/// * `object`: The object reference to query.
pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
object.is_live::<VM>()
pub fn is_live_object(object: ObjectReference) -> bool {
object.is_live()
}

/// Check if `addr` is the address of an object reference to an MMTk object.
/// Check if `addr` is the raw address of an object reference to an MMTk object.
///
/// Concretely:
/// 1. Return true if `ObjectReference::from_raw_address(addr)` is a valid object reference to an
/// object in any space in MMTk.
/// 2. Return false otherwise.
/// 1. Return `Some(object)` if `ObjectReference::from_raw_address(addr)` is a valid object
/// reference to an object in any space in MMTk. `object` is the result of
/// `ObjectReference::from_raw_address(addr)`.
/// 2. Return `None` otherwise.
///
/// This function is useful for conservative root scanning. The VM can iterate through all words in
/// a stack, filter out zeros, misaligned words, obviously out-of-range words (such as addresses
Expand All @@ -603,7 +604,9 @@ pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
/// is present. See `crate::plan::global::BasePlan::vm_space`.
///
/// Argument:
/// * `addr`: An arbitrary address.
/// * `addr`: A non-zero word-aligned address. Because the raw address of an `ObjectReference`
/// cannot be zero and must be word-aligned, the caller must filter out zero and misaligned
/// addresses before calling this function. Otherwise the behavior is undefined.
#[cfg(feature = "is_mmtk_object")]
pub fn is_mmtk_object(addr: Address) -> Option<ObjectReference> {
crate::util::is_mmtk_object::check_object_reference(addr)
Expand All @@ -613,12 +616,13 @@ pub fn is_mmtk_object(addr: Address) -> Option<ObjectReference> {
/// This should be used instead of [`crate::memory_manager::is_mmtk_object`] for conservative stack scanning if
/// the binding may have internal pointers on the stack.
///
/// Note that, we only consider pointers that point to addresses that are equal or greater than the in-object addresss
/// (i.e. [`crate::util::ObjectReference::to_address()`] which is the same as `object_ref.to_raw_address() + ObjectModel::IN_OBJECT_ADDRESS_OFFSET`),
/// and within the allocation as 'internal pointers'. To be precise, for each object ref `obj_ref`, internal pointers are in the range
/// `[obj_ref + ObjectModel::IN_OBJECT_ADDRESS_OFFSET, ObjectModel::ref_to_object_start(obj_ref) + ObjectModel::get_current_size(obj_ref))`.
/// If a binding defines internal pointers differently, calling this method is undefined behavior.
/// If this is the case for you, please submit an issue or engage us on Zulip to discuss more.
/// Note that, we only consider pointers that point to addresses that are equal to or greater than
/// the raw addresss of the object's `ObjectReference`, and within the allocation as 'internal
/// pointers'. To be precise, for each object ref `obj_ref`, internal pointers are in the range
/// `[obj_ref.to_raw_address(), obj_ref.to_object_start() +
/// ObjectModel::get_current_size(obj_ref))`. If a binding defines internal pointers differently,
/// calling this method is undefined behavior. If this is the case for you, please submit an issue
/// or engage us on Zulip to discuss more.
///
/// Note that, in the similar situation as [`crate::memory_manager::is_mmtk_object`], the binding should filter
/// out obvious non-pointers (e.g. alignment check, bound check, etc) before calling this function to avoid unnecessary
Expand All @@ -633,7 +637,7 @@ pub fn is_mmtk_object(addr: Address) -> Option<ObjectReference> {
/// * `internal_ptr`: The address to start searching. We search backwards from this address (including this address) to find the base reference.
/// * `max_search_bytes`: The maximum number of bytes we may search for an object with VO bit set. `internal_ptr - max_search_bytes` is not included.
#[cfg(feature = "is_mmtk_object")]
pub fn find_object_from_internal_pointer<VM: VMBinding>(
pub fn find_object_from_internal_pointer(
internal_ptr: Address,
max_search_bytes: usize,
) -> Option<ObjectReference> {
Expand All @@ -655,7 +659,7 @@ pub fn find_object_from_internal_pointer<VM: VMBinding>(
/// object for the VM in response to `memory_manager::alloc`, this function will return true; but
/// if the VM directly called `malloc` to allocate the object, this function will return false.
///
/// If `is_mmtk_object(object.to_address())` returns true, `is_in_mmtk_spaces(object)` must also
/// If `is_mmtk_object(object.to_raw_address())` returns true, `is_in_mmtk_spaces(object)` must also
/// return true.
///
/// This function is useful if an object reference in the VM can be either a pointer into the MMTk
Expand All @@ -669,10 +673,10 @@ pub fn find_object_from_internal_pointer<VM: VMBinding>(
///
/// Arguments:
/// * `object`: The object reference to query.
pub fn is_in_mmtk_spaces<VM: VMBinding>(object: ObjectReference) -> bool {
pub fn is_in_mmtk_spaces(object: ObjectReference) -> bool {
use crate::mmtk::SFT_MAP;
SFT_MAP
.get_checked(object.to_address::<VM>())
.get_checked(object.to_raw_address())
.is_in_space(object)
}

Expand Down Expand Up @@ -766,10 +770,10 @@ pub fn add_finalizer<VM: VMBinding>(
/// Arguments:
/// * `object`: The object to be pinned
#[cfg(feature = "object_pinning")]
pub fn pin_object<VM: VMBinding>(object: ObjectReference) -> bool {
pub fn pin_object(object: ObjectReference) -> bool {
use crate::mmtk::SFT_MAP;
SFT_MAP
.get_checked(object.to_address::<VM>())
.get_checked(object.to_raw_address())
.pin_object(object)
}

Expand All @@ -780,10 +784,10 @@ pub fn pin_object<VM: VMBinding>(object: ObjectReference) -> bool {
/// Arguments:
/// * `object`: The object to be pinned
#[cfg(feature = "object_pinning")]
pub fn unpin_object<VM: VMBinding>(object: ObjectReference) -> bool {
pub fn unpin_object(object: ObjectReference) -> bool {
use crate::mmtk::SFT_MAP;
SFT_MAP
.get_checked(object.to_address::<VM>())
.get_checked(object.to_raw_address())
.unpin_object(object)
}

Expand All @@ -792,10 +796,10 @@ pub fn unpin_object<VM: VMBinding>(object: ObjectReference) -> bool {
/// Arguments:
/// * `object`: The object to be checked
#[cfg(feature = "object_pinning")]
pub fn is_pinned<VM: VMBinding>(object: ObjectReference) -> bool {
pub fn is_pinned(object: ObjectReference) -> bool {
use crate::mmtk::SFT_MAP;
SFT_MAP
.get_checked(object.to_address::<VM>())
.get_checked(object.to_raw_address())
.is_object_pinned(object)
}

Expand Down
2 changes: 1 addition & 1 deletion src/plan/barriers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ impl<S: BarrierSemantics> ObjectBarrier<S> {
fn log_object(&self, object: ObjectReference) -> bool {
#[cfg(all(feature = "vo_bit", feature = "extreme_assertions"))]
debug_assert!(
crate::util::metadata::vo_bit::is_vo_bit_set::<S::VM>(object),
crate::util::metadata::vo_bit::is_vo_bit_set(object),
"object bit is unset"
);
loop {
Expand Down
Loading

0 comments on commit 45cdf31

Please sign in to comment.