diff --git a/benches/main.rs b/benches/main.rs index b759bebeaf..6c735ce4e2 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -27,6 +27,7 @@ pub fn bench_main(_c: &mut Criterion) { match std::env::var("MMTK_BENCH") { Ok(bench) => match bench.as_str() { "alloc" => mock_bench::alloc::bench(_c), + "internal_pointer" => mock_bench::internal_pointer::bench(_c), "sft" => mock_bench::sft::bench(_c), _ => panic!("Unknown benchmark {:?}", bench), }, diff --git a/benches/mock_bench/internal_pointer.rs b/benches/mock_bench/internal_pointer.rs new file mode 100644 index 0000000000..fee21856d3 --- /dev/null +++ b/benches/mock_bench/internal_pointer.rs @@ -0,0 +1,95 @@ +use criterion::Criterion; + +#[cfg(feature = "is_mmtk_object")] +use mmtk::util::test_util::fixtures::*; +use mmtk::util::test_util::mock_method::*; +use mmtk::util::test_util::mock_vm::{write_mockvm, MockVM}; + +pub fn bench(c: &mut Criterion) { + // Setting a larger heap, although the GC should be disabled in the MockVM + #[cfg(feature = "is_mmtk_object")] + let mut fixture = MutatorFixture::create_with_heapsize(1 << 30); + + // Normal objects + // 16KB object -- we want to make sure the object can fit into any normal space (e.g. immix space or mark sweep space) + const NORMAL_OBJECT_SIZE: usize = 16 * 1024; + write_mockvm(|mock| { + *mock = MockVM { + get_object_size: MockMethod::new_fixed(Box::new(|_| NORMAL_OBJECT_SIZE)), + is_collection_enabled: MockMethod::new_fixed(Box::new(|_| false)), + ..MockVM::default() + } + }); + + c.bench_function("internal pointer - normal objects", |_b| { + #[cfg(feature = "is_mmtk_object")] + { + use mmtk::memory_manager; + use mmtk::AllocationSemantics; + let addr = memory_manager::alloc( + &mut fixture.mutator, + NORMAL_OBJECT_SIZE, + 8, + 0, + AllocationSemantics::Default, + ); + let obj_ref = MockVM::object_start_to_ref(addr); + memory_manager::post_alloc( + &mut fixture.mutator, + obj_ref, + NORMAL_OBJECT_SIZE, + AllocationSemantics::Default, + ); + let obj_end = addr + NORMAL_OBJECT_SIZE; + _b.iter(|| { + memory_manager::find_object_from_internal_pointer::( + obj_end - 1, + NORMAL_OBJECT_SIZE, + ); + }) + } + #[cfg(not(feature = "is_mmtk_object"))] + panic!("The benchmark requires is_mmtk_object feature to run"); + }); + + // Large objects + // 16KB object + const LARGE_OBJECT_SIZE: usize = 16 * 1024; + write_mockvm(|mock| { + *mock = MockVM { + get_object_size: MockMethod::new_fixed(Box::new(|_| LARGE_OBJECT_SIZE)), + is_collection_enabled: MockMethod::new_fixed(Box::new(|_| false)), + ..MockVM::default() + } + }); + c.bench_function("internal pointer - large objects", |_b| { + #[cfg(feature = "is_mmtk_object")] + { + use mmtk::memory_manager; + use mmtk::AllocationSemantics; + let addr = memory_manager::alloc( + &mut fixture.mutator, + LARGE_OBJECT_SIZE, + 8, + 0, + AllocationSemantics::Los, + ); + let obj_ref = MockVM::object_start_to_ref(addr); + memory_manager::post_alloc( + &mut fixture.mutator, + obj_ref, + LARGE_OBJECT_SIZE, + AllocationSemantics::Los, + ); + let obj_end = addr + LARGE_OBJECT_SIZE; + _b.iter(|| { + memory_manager::find_object_from_internal_pointer::( + obj_end - 1, + LARGE_OBJECT_SIZE, + ); + }) + } + #[cfg(not(feature = "is_mmtk_object"))] + panic!("The benchmark requires is_mmtk_object feature to run"); + }); +} diff --git a/benches/mock_bench/mod.rs b/benches/mock_bench/mod.rs index 532ed23009..f4ca1c4428 100644 --- a/benches/mock_bench/mod.rs +++ b/benches/mock_bench/mod.rs @@ -1,2 +1,3 @@ pub mod alloc; +pub mod internal_pointer; pub mod sft; diff --git a/docs/userguide/src/migration/prefix.md b/docs/userguide/src/migration/prefix.md index 05fa6f91d9..c6765bd779 100644 --- a/docs/userguide/src/migration/prefix.md +++ b/docs/userguide/src/migration/prefix.md @@ -32,6 +32,22 @@ Notes for the mmtk-core developers: ## 0.27.0 +### `is_mmtk_object` returns `Option + +```admonish tldr +`memory_manager::is_mmtk_object` now returns `Option` instead of `bool`. +Bindings can use the returned object reference instead of computing the object reference at the binding side. +``` + +API changes: +* module `memory_manager` + - `is_mmtk_object` now returns `Option`. + +See also: + +- PR: +- Example: + ### Introduce `ObjectModel::IN_OBJECT_ADDRESS_OFFSET` ```admonish tldr @@ -49,6 +65,11 @@ API changes: - Add a constant `ALIGNMENT` which equals to the word size. All object references should be at least aligned to the word size. This is checked in debug builds when an `ObjectReference` is constructed. +See also: + +- PR: +- Example: + ## 0.26.0 ### Rename "edge" to "slot" diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 7b3a495fa3..c05f3432eb 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -588,40 +588,56 @@ pub fn is_live_object(object: ObjectReference) -> bool { /// Concretely: /// 1. Return true if `ObjectReference::from_raw_address(addr)` is a valid object reference to an /// object in any space in MMTk. -/// 2. Also return true if there exists an `objref: ObjectReference` such that -/// - `objref` is a valid object reference to an object in any space in MMTk, and -/// - `lo <= objref.to_address() < hi`, where -/// - `lo = addr.align_down(VO_BIT_REGION_SIZE)` and -/// - `hi = lo + VO_BIT_REGION_SIZE` and -/// - `VO_BIT_REGION_SIZE` is [`crate::util::is_mmtk_object::VO_BIT_REGION_SIZE`]. -/// It is the byte granularity of the valid object (VO) bit. -/// 3. Return false otherwise. This function never panics. -/// -/// This function uses the "valid object (VO) bits" side metadata, i.e. a bitmap. -/// For space efficiency, each bit of the bitmap governs a small region of memory. -/// The size of a region is currently defined as the [minimum object size](crate::util::constants::MIN_OBJECT_SIZE), -/// which is currently defined as the [word size](crate::util::constants::BYTES_IN_WORD), -/// which is 4 bytes on 32-bit systems or 8 bytes on 64-bit systems. -/// The alignment of a region is also the region size. -/// If a VO bit is `1`, the bitmap cannot tell which address within the 4-byte or 8-byte region -/// is the valid object reference. -/// Therefore, if this method returns true, the binding can compute the object reference by -/// aligning the address to [`crate::util::ObjectReference::ALIGNMENT`]. +/// 2. Return false otherwise. /// /// This function is useful for conservative root scanning. The VM can iterate through all words in /// a stack, filter out zeros, misaligned words, obviously out-of-range words (such as addresses /// greater than `0x0000_7fff_ffff_ffff` on Linux on x86_64), and use this function to deside if the /// word is really a reference. /// +/// This function does not handle internal pointers. If a binding may have internal pointers on +/// the stack, and requires identifying the base reference for an internal pointer, they should use +/// [`find_object_from_internal_pointer`] instead. +/// /// Note: This function has special behaviors if the VM space (enabled by the `vm_space` feature) /// is present. See `crate::plan::global::BasePlan::vm_space`. /// /// Argument: /// * `addr`: An arbitrary address. #[cfg(feature = "is_mmtk_object")] -pub fn is_mmtk_object(addr: Address) -> bool { - use crate::mmtk::SFT_MAP; - SFT_MAP.get_checked(addr).is_mmtk_object(addr) +pub fn is_mmtk_object(addr: Address) -> Option { + crate::util::is_mmtk_object::check_object_reference(addr) +} + +/// Find if there is an object with VO bit set for the given address range. +/// This should be used instead of [`crate::memory_manager::is_mmtk_object`] for conservative stack scanning if +/// the binding may have internal pointers on the stack. +/// +/// Note that, we only consider pointers that point to addresses that are equal or greater than the in-object addresss +/// (i.e. [`crate::util::ObjectReference::to_address()`] which is the same as `object_ref.to_raw_address() + ObjectModel::IN_OBJECT_ADDRESS_OFFSET`), +/// and within the allocation as 'internal pointers'. To be precise, for each object ref `obj_ref`, internal pointers are in the range +/// `[obj_ref + ObjectModel::IN_OBJECT_ADDRESS_OFFSET, ObjectModel::ref_to_object_start(obj_ref) + ObjectModel::get_current_size(obj_ref))`. +/// If a binding defines internal pointers differently, calling this method is undefined behavior. +/// If this is the case for you, please submit an issue or engage us on Zulip to discuss more. +/// +/// Note that, in the similar situation as [`crate::memory_manager::is_mmtk_object`], the binding should filter +/// out obvious non-pointers (e.g. alignment check, bound check, etc) before calling this function to avoid unnecessary +/// cost. This method is not cheap. +/// +/// To minimize the cost, the user should also use a small `max_search_bytes`. +/// +/// Note: This function has special behaviors if the VM space (enabled by the `vm_space` feature) +/// is present. See `crate::plan::global::BasePlan::vm_space`. +/// +/// Argument: +/// * `internal_ptr`: The address to start searching. We search backwards from this address (including this address) to find the base reference. +/// * `max_search_bytes`: The maximum number of bytes we may search for an object with VO bit set. `internal_ptr - max_search_bytes` is not included. +#[cfg(feature = "is_mmtk_object")] +pub fn find_object_from_internal_pointer( + internal_ptr: Address, + max_search_bytes: usize, +) -> Option { + crate::util::is_mmtk_object::check_internal_reference(internal_ptr, max_search_bytes) } /// Return true if the `object` lies in a region of memory where diff --git a/src/plan/global.rs b/src/plan/global.rs index efdffa4084..90b7452c40 100644 --- a/src/plan/global.rs +++ b/src/plan/global.rs @@ -355,9 +355,10 @@ pub struct BasePlan { /// If VM space is present, it has some special interaction with the /// `memory_manager::is_mmtk_object` and the `memory_manager::is_in_mmtk_spaces` functions. /// - /// - The `is_mmtk_object` funciton requires the valid object (VO) bit side metadata to identify objects, - /// but currently we do not require the boot image to provide it, so it will not work if the - /// address argument is in the VM space. + /// - The functions `is_mmtk_object` and `find_object_from_internal_pointer` require + /// the valid object (VO) bit side metadata to identify objects. + /// If the binding maintains the VO bit for objects in VM spaces, those functions will work accordingly. + /// Otherwise, calling them is undefined behavior. /// /// - The `is_in_mmtk_spaces` currently returns `true` if the given object reference is in /// the VM space. diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index f4b5b950fa..bf73255033 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -74,8 +74,20 @@ impl SFT for CopySpace { } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + crate::util::metadata::vo_bit::find_object_from_internal_pointer::( + ptr, + max_search_bytes, + ) } fn sft_trace_object( diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index e56761559d..547ac6d794 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -141,8 +141,18 @@ impl SFT for ImmixSpace { crate::util::metadata::vo_bit::set_vo_bit::(_object); } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + // We don't need to search more than the max object size in the immix space. + let search_bytes = usize::min(super::MAX_IMMIX_OBJECT_SIZE, max_search_bytes); + crate::util::metadata::vo_bit::find_object_from_internal_pointer::(ptr, search_bytes) } fn sft_trace_object( &self, diff --git a/src/policy/immortalspace.rs b/src/policy/immortalspace.rs index bdd2c3b660..6d3e63922d 100644 --- a/src/policy/immortalspace.rs +++ b/src/policy/immortalspace.rs @@ -64,8 +64,19 @@ impl SFT for ImmortalSpace { crate::util::metadata::vo_bit::set_vo_bit::(object); } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + crate::util::metadata::vo_bit::find_object_from_internal_pointer::( + ptr, + max_search_bytes, + ) } fn sft_trace_object( &self, diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index dd14db3b61..b64a5371c6 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -80,11 +80,65 @@ impl SFT for LargeObjectSpace { #[cfg(feature = "vo_bit")] crate::util::metadata::vo_bit::set_vo_bit::(object); + #[cfg(all(feature = "is_mmtk_object", debug_assertions))] + { + use crate::util::constants::LOG_BYTES_IN_PAGE; + let vo_addr = object.to_address::(); + let offset_from_page_start = vo_addr & ((1 << LOG_BYTES_IN_PAGE) - 1) as usize; + debug_assert!( + offset_from_page_start < crate::util::metadata::vo_bit::VO_BIT_WORD_TO_REGION, + "The in-object address is not in the first 512 bytes of a page. The internal pointer searching for LOS won't work." + ); + } + self.treadmill.add_to_treadmill(object, alloc); } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + use crate::util::metadata::vo_bit; + // For large object space, it is a bit special. We only need to check VO bit for each page. + let mut cur_page = ptr.align_down(BYTES_IN_PAGE); + let low_page = ptr + .saturating_sub(max_search_bytes) + .align_down(BYTES_IN_PAGE); + while cur_page >= low_page { + // If the page start is not mapped, there can't be an object in it. + if !cur_page.is_mapped() { + return None; + } + // For performance, we only check the first word which maps to the first 512 bytes in the page. + // In almost all the cases, it should be sufficient. + // However, if the in-object address is not in the first 512 bytes, this won't work. + // We assert this when we set VO bit for LOS. + if vo_bit::get_raw_vo_bit_word(cur_page) != 0 { + // Find the exact address that has vo bit set + for offset in 0..vo_bit::VO_BIT_WORD_TO_REGION { + let addr = cur_page + offset; + if unsafe { vo_bit::is_vo_addr(addr) } { + let obj = vo_bit::is_internal_ptr_from_vo_bit::(addr, ptr); + if obj.is_some() { + return obj; + } else { + return None; + } + } + } + unreachable!( + "We found vo bit in the raw word, but we cannot find the exact address" + ); + } + + cur_page -= BYTES_IN_PAGE; + } + None } fn sft_trace_object( &self, diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index 271b5613db..858d07fd5f 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -74,8 +74,19 @@ impl SFT for LockFreeImmortalSpace { crate::util::metadata::vo_bit::set_vo_bit::(_object); } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + crate::util::metadata::vo_bit::find_object_from_internal_pointer::( + ptr, + max_search_bytes, + ) } fn sft_trace_object( &self, diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs index b40af105dd..9db05e56aa 100644 --- a/src/policy/markcompactspace.rs +++ b/src/policy/markcompactspace.rs @@ -75,8 +75,20 @@ impl SFT for MarkCompactSpace { } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + crate::util::metadata::vo_bit::find_object_from_internal_pointer::( + ptr, + max_search_bytes, + ) } fn sft_trace_object( diff --git a/src/policy/marksweepspace/malloc_ms/global.rs b/src/policy/marksweepspace/malloc_ms/global.rs index 02065ea3e2..d95aa8437b 100644 --- a/src/policy/marksweepspace/malloc_ms/global.rs +++ b/src/policy/marksweepspace/malloc_ms/global.rs @@ -102,11 +102,23 @@ impl SFT for MallocSpace { /// For malloc space, we just use the side metadata. #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { + fn is_mmtk_object(&self, addr: Address) -> Option { debug_assert!(!addr.is_zero()); // `addr` cannot be mapped by us. It should be mapped by the malloc library. debug_assert!(!addr.is_mapped()); - has_object_alloced_by_malloc::(addr).is_some() + has_object_alloced_by_malloc::(addr) + } + + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + crate::util::metadata::vo_bit::find_object_from_internal_pointer::( + ptr, + max_search_bytes, + ) } fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { diff --git a/src/policy/marksweepspace/native_ms/global.rs b/src/policy/marksweepspace/native_ms/global.rs index 30c7629905..41773be244 100644 --- a/src/policy/marksweepspace/native_ms/global.rs +++ b/src/policy/marksweepspace/native_ms/global.rs @@ -193,8 +193,19 @@ impl SFT for MarkSweepSpace { } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + // We don't need to search more than the max object size in the mark sweep space. + let search_bytes = usize::min(MAX_OBJECT_SIZE, max_search_bytes); + crate::util::metadata::vo_bit::find_object_from_internal_pointer::(ptr, search_bytes) } fn sft_trace_object( diff --git a/src/policy/sft.rs b/src/policy/sft.rs index 09729fbcc4..cab4cc987a 100644 --- a/src/policy/sft.rs +++ b/src/policy/sft.rs @@ -76,7 +76,14 @@ pub trait SFT { /// Some spaces, like `MallocSpace`, use third-party libraries to allocate memory. /// Such spaces needs to override this method. #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool; + fn is_mmtk_object(&self, addr: Address) -> Option; + + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option; /// Initialize object metadata (in the header, or in the side metadata). fn initialize_object_metadata(&self, object: ObjectReference, alloc: bool); @@ -154,8 +161,16 @@ impl SFT for EmptySpaceSFT { false } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, _addr: Address) -> bool { - false + fn is_mmtk_object(&self, _addr: Address) -> Option { + None + } + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + _ptr: Address, + _max_search_bytes: usize, + ) -> Option { + None } fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { diff --git a/src/policy/vmspace.rs b/src/policy/vmspace.rs index 8a9c070bd1..ddb4b7881e 100644 --- a/src/policy/vmspace.rs +++ b/src/policy/vmspace.rs @@ -64,8 +64,19 @@ impl SFT for VMSpace { crate::util::metadata::vo_bit::set_vo_bit::(object); } #[cfg(feature = "is_mmtk_object")] - fn is_mmtk_object(&self, addr: Address) -> bool { - crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr).is_some() + fn is_mmtk_object(&self, addr: Address) -> Option { + crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::(addr) + } + #[cfg(feature = "is_mmtk_object")] + fn find_object_from_internal_pointer( + &self, + ptr: Address, + max_search_bytes: usize, + ) -> Option { + crate::util::metadata::vo_bit::find_object_from_internal_pointer::( + ptr, + max_search_bytes, + ) } fn sft_trace_object( &self, diff --git a/src/util/is_mmtk_object.rs b/src/util/is_mmtk_object.rs index 05d85472c5..6e3e4bac2a 100644 --- a/src/util/is_mmtk_object.rs +++ b/src/util/is_mmtk_object.rs @@ -2,3 +2,26 @@ /// The VM can use this to check if an object is properly aligned. pub const VO_BIT_REGION_SIZE: usize = 1usize << crate::util::metadata::vo_bit::VO_BIT_SIDE_METADATA_SPEC.log_bytes_in_region; + +use crate::util::{Address, ObjectReference}; + +pub(crate) fn check_object_reference(addr: Address) -> Option { + use crate::mmtk::SFT_MAP; + SFT_MAP.get_checked(addr).is_mmtk_object(addr) +} + +pub(crate) fn check_internal_reference( + addr: Address, + max_search_bytes: usize, +) -> Option { + use crate::mmtk::SFT_MAP; + let ret = SFT_MAP + .get_checked(addr) + .find_object_from_internal_pointer(addr, max_search_bytes); + #[cfg(debug_assertions)] + if let Some(obj) = ret { + let obj = check_object_reference(obj.to_raw_address()); + debug_assert_eq!(obj, ret); + } + ret +} diff --git a/src/util/metadata/side_metadata/global.rs b/src/util/metadata/side_metadata/global.rs index cc1a738640..123698aa6e 100644 --- a/src/util/metadata/side_metadata/global.rs +++ b/src/util/metadata/side_metadata/global.rs @@ -34,11 +34,16 @@ pub struct SideMetadataSpec { } impl SideMetadataSpec { - /// Is offset for this spec Address? (contiguous side metadata for 64 bits, and global specs in 32 bits) - pub const fn is_absolute_offset(&self) -> bool { + /// Is this spec using contiguous side metadata? If not, it uses chunked side metadata. + pub const fn uses_contiguous_side_metadata(&self) -> bool { self.is_global || cfg!(target_pointer_width = "64") } + /// Is offset for this spec Address? + pub const fn is_absolute_offset(&self) -> bool { + self.uses_contiguous_side_metadata() + } + /// If offset for this spec relative? (chunked side metadata for local specs in 32 bits) pub const fn is_rel_offset(&self) -> bool { !self.is_absolute_offset() @@ -149,63 +154,130 @@ impl SideMetadataSpec { MMAPPER.is_mapped_address(meta_addr) } - /// This method is used for bulk updating side metadata for a data address range. As we cannot guarantee + /// This method is used for iterating side metadata for a data address range. As we cannot guarantee /// that the data address range can be mapped to whole metadata bytes, we have to deal with cases that /// we need to mask and zero certain bits in a metadata byte. The end address and the end bit are exclusive. /// The end bit for update_bits could be 8, so overflowing needs to be taken care of. - pub(super) fn update_meta_bits( + /// + /// Returns true if we iterate through every bits in the range. Return false if we abort iteration early. + /// + /// Arguments: + /// * `forwards`: If true, we iterate forwards (from start/low address to end/high address). Otherwise, + /// we iterate backwards (from end/high address to start/low address). + /// * `visit_bytes`/`visit_bits`: The closures returns whether the itertion is early terminated. + pub(super) fn iterate_meta_bits( meta_start_addr: Address, meta_start_bit: u8, meta_end_addr: Address, meta_end_bit: u8, - update_bytes: &impl Fn(Address, Address), - update_bits: &impl Fn(Address, u8, u8), - ) { + forwards: bool, + visit_bytes: &impl Fn(Address, Address) -> bool, + visit_bits: &impl Fn(Address, u8, u8) -> bool, + ) -> bool { + trace!( + "iterate_meta_bits: {} {}, {} {}", + meta_start_addr, + meta_start_bit, + meta_end_addr, + meta_end_bit + ); // Start/end is the same, we don't need to do anything. if meta_start_addr == meta_end_addr && meta_start_bit == meta_end_bit { - return; + return false; } // zeroing bytes if meta_start_bit == 0 && meta_end_bit == 0 { - update_bytes(meta_start_addr, meta_end_addr); - return; + return visit_bytes(meta_start_addr, meta_end_addr); } if meta_start_addr == meta_end_addr { // Update bits in the same byte between start and end bit - update_bits(meta_start_addr, meta_start_bit, meta_end_bit); + visit_bits(meta_start_addr, meta_start_bit, meta_end_bit) } else if meta_start_addr + 1usize == meta_end_addr && meta_end_bit == 0 { // Update bits in the same byte after the start bit (between start bit and 8) - update_bits(meta_start_addr, meta_start_bit, 8); + visit_bits(meta_start_addr, meta_start_bit, 8) } else { - // update bits in the first byte - Self::update_meta_bits( - meta_start_addr, - meta_start_bit, - meta_start_addr + 1usize, - 0, - update_bytes, - update_bits, - ); - // update bytes in the middle - Self::update_meta_bits( - meta_start_addr + 1usize, - 0, - meta_end_addr, - 0, - update_bytes, - update_bits, - ); - // update bits in the last byte - Self::update_meta_bits( - meta_end_addr, - 0, - meta_end_addr, - meta_end_bit, - update_bytes, - update_bits, - ); + // Update each segments. + // Clippy wants to move this if block up as a else-if block. But I think this is logically more clear. So disable the clippy warning. + #[allow(clippy::collapsible_else_if)] + if forwards { + // update bits in the first byte + if Self::iterate_meta_bits( + meta_start_addr, + meta_start_bit, + meta_start_addr + 1usize, + 0, + forwards, + visit_bytes, + visit_bits, + ) { + return true; + } + // update bytes in the middle + if Self::iterate_meta_bits( + meta_start_addr + 1usize, + 0, + meta_end_addr, + 0, + forwards, + visit_bytes, + visit_bits, + ) { + return true; + } + // update bits in the last byte + if Self::iterate_meta_bits( + meta_end_addr, + 0, + meta_end_addr, + meta_end_bit, + forwards, + visit_bytes, + visit_bits, + ) { + return true; + } + false + } else { + // update bits in the last byte + if Self::iterate_meta_bits( + meta_end_addr, + 0, + meta_end_addr, + meta_end_bit, + forwards, + visit_bytes, + visit_bits, + ) { + return true; + } + // update bytes in the middle + if Self::iterate_meta_bits( + meta_start_addr + 1usize, + 0, + meta_end_addr, + 0, + forwards, + visit_bytes, + visit_bits, + ) { + return true; + } + // update bits in the first byte + if Self::iterate_meta_bits( + meta_start_addr, + meta_start_bit, + meta_start_addr + 1usize, + 0, + forwards, + visit_bytes, + visit_bits, + ) { + return true; + } + false + } } } @@ -216,20 +288,23 @@ impl SideMetadataSpec { meta_end_addr: Address, meta_end_bit: u8, ) { - let zero_bytes = |start: Address, end: Address| { + let zero_bytes = |start: Address, end: Address| -> bool { memory::zero(start, end - start); + false }; - let zero_bits = |addr: Address, start_bit: u8, end_bit: u8| { + let zero_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { // we are zeroing selected bits in one byte let mask: u8 = u8::MAX.checked_shl(end_bit.into()).unwrap_or(0) | !(u8::MAX << start_bit); // Get a mask that the bits we need to zero are set to zero, and the other bits are 1. unsafe { addr.as_ref::() }.fetch_and(mask, Ordering::SeqCst); + false }; - Self::update_meta_bits( + Self::iterate_meta_bits( meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit, + true, &zero_bytes, &zero_bits, ); @@ -242,20 +317,23 @@ impl SideMetadataSpec { meta_end_addr: Address, meta_end_bit: u8, ) { - let set_bytes = |start: Address, end: Address| { + let set_bytes = |start: Address, end: Address| -> bool { memory::set(start, 0xff, end - start); + false }; - let set_bits = |addr: Address, start_bit: u8, end_bit: u8| { + let set_bits = |addr: Address, start_bit: u8, end_bit: u8| -> bool { // we are setting selected bits in one byte let mask: u8 = !(u8::MAX.checked_shl(end_bit.into()).unwrap_or(0)) & (u8::MAX << start_bit); // Get a mask that the bits we need to set are 1, and the other bits are 0. unsafe { addr.as_ref::() }.fetch_or(mask, Ordering::SeqCst); + false }; - Self::update_meta_bits( + Self::iterate_meta_bits( meta_start_addr, meta_start_bit, meta_end_addr, meta_end_bit, + true, &set_bytes, &set_bits, ); @@ -420,14 +498,17 @@ impl SideMetadataSpec { debug_assert_eq!(dst_meta_start_bit, src_meta_start_bit); - let copy_bytes = |dst_start: Address, dst_end: Address| unsafe { - let byte_offset = dst_start - dst_meta_start_addr; - let src_start = src_meta_start_addr + byte_offset; - let size = dst_end - dst_start; - std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); + let copy_bytes = |dst_start: Address, dst_end: Address| -> bool { + unsafe { + let byte_offset = dst_start - dst_meta_start_addr; + let src_start = src_meta_start_addr + byte_offset; + let size = dst_end - dst_start; + std::ptr::copy::(src_start.to_ptr(), dst_start.to_mut_ptr(), size); + false + } }; - let copy_bits = |dst: Address, start_bit: u8, end_bit: u8| { + let copy_bits = |dst: Address, start_bit: u8, end_bit: u8| -> bool { let byte_offset = dst - dst_meta_start_addr; let src = src_meta_start_addr + byte_offset; // we are setting selected bits in one byte @@ -437,13 +518,15 @@ impl SideMetadataSpec { let old_dst = unsafe { dst.as_ref::() }.load(Ordering::Relaxed); let new = (old_src & mask) | (old_dst & !mask); unsafe { dst.as_ref::() }.store(new, Ordering::Relaxed); + false }; - Self::update_meta_bits( + Self::iterate_meta_bits( dst_meta_start_addr, dst_meta_start_bit, dst_meta_end_addr, dst_meta_end_bit, + true, ©_bytes, ©_bits, ); @@ -456,7 +539,13 @@ impl SideMetadataSpec { /// * check if the side metadata memory is mapped. /// * check if the side metadata content is correct based on a sanity map (only for extreme assertions). #[allow(unused_variables)] // data_addr/input is not used in release build - fn side_metadata_access R, V: FnOnce(R)>( + fn side_metadata_access< + const CHECK_VALUE: bool, + T: MetadataValue, + R: Copy, + F: FnOnce() -> R, + V: FnOnce(R), + >( &self, data_addr: Address, input: Option, @@ -474,7 +563,9 @@ impl SideMetadataSpec { // A few checks #[cfg(debug_assertions)] { - self.assert_value_type::(input); + if CHECK_VALUE { + self.assert_value_type::(input); + } #[cfg(feature = "extreme_assertions")] self.assert_metadata_mapped(data_addr); } @@ -483,7 +574,9 @@ impl SideMetadataSpec { let ret = access_func(); // Verifying the side metadata: checks the result with the sanity table, or store some results to the sanity table - verify_func(ret); + if CHECK_VALUE { + verify_func(ret); + } ret } @@ -497,7 +590,7 @@ impl SideMetadataSpec { /// 1. Concurrent access to this operation is undefined behaviour. /// 2. Interleaving Non-atomic and atomic operations is undefined behaviour. pub unsafe fn load(&self, data_addr: Address) -> T { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, None, || { @@ -529,7 +622,7 @@ impl SideMetadataSpec { /// 1. Concurrent access to this operation is undefined behaviour. /// 2. Interleaving Non-atomic and atomic operations is undefined behaviour. pub unsafe fn store(&self, data_addr: Address, metadata: T) { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(metadata), || { @@ -556,7 +649,7 @@ impl SideMetadataSpec { /// Loads a value from the side metadata for the given address. /// This method has similar semantics to `store` in Rust atomics. pub fn load_atomic(&self, data_addr: Address, order: Ordering) -> T { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, None, || { @@ -581,7 +674,7 @@ impl SideMetadataSpec { /// Store the given value to the side metadata for the given address. /// This method has similar semantics to `store` in Rust atomics. pub fn store_atomic(&self, data_addr: Address, metadata: T, order: Ordering) { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(metadata), || { @@ -659,7 +752,7 @@ impl SideMetadataSpec { // For extreme assertions, we only set 1 to the given address. self.store_atomic::(data_addr, 1, order) } else { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(1u8), || { @@ -672,6 +765,48 @@ impl SideMetadataSpec { } } + /// Load the raw byte in the side metadata byte that is mapped to the data address. + /// + /// # Safety + /// This is unsafe because: + /// + /// 1. Concurrent access to this operation is undefined behaviour. + /// 2. Interleaving Non-atomic and atomic operations is undefined behaviour. + pub unsafe fn load_raw_byte(&self, data_addr: Address) -> u8 { + debug_assert!(self.log_num_of_bits < 3); + self.side_metadata_access::( + data_addr, + None, + || { + let meta_addr = address_to_meta_address(self, data_addr); + meta_addr.load::() + }, + |_| {}, + ) + } + + /// Load the raw word that includes the side metadata byte mapped to the data address. + /// + /// # Safety + /// This is unsafe because: + /// + /// 1. Concurrent access to this operation is undefined behaviour. + /// 2. Interleaving Non-atomic and atomic operations is undefined behaviour. + pub unsafe fn load_raw_word(&self, data_addr: Address) -> usize { + use crate::util::constants::*; + debug_assert!(self.log_num_of_bits < (LOG_BITS_IN_BYTE + LOG_BYTES_IN_ADDRESS) as usize); + self.side_metadata_access::( + data_addr, + None, + || { + let meta_addr = address_to_meta_address(self, data_addr); + let aligned_meta_addr = meta_addr.align_down(BYTES_IN_ADDRESS); + aligned_meta_addr.load::() + }, + |_| {}, + ) + } + /// Stores the new value into the side metadata for the gien address if the current value is the same as the old value. /// This method has similar semantics to `compare_exchange` in Rust atomics. /// The return value is a result indicating whether the new value was written and containing the previous value. @@ -684,7 +819,7 @@ impl SideMetadataSpec { success_order: Ordering, failure_order: Ordering, ) -> std::result::Result { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(new_metadata), || { @@ -770,7 +905,7 @@ impl SideMetadataSpec { val: T, order: Ordering, ) -> T { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(val), || { @@ -805,7 +940,7 @@ impl SideMetadataSpec { val: T, order: Ordering, ) -> T { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(val), || { @@ -839,7 +974,7 @@ impl SideMetadataSpec { val: T, order: Ordering, ) -> T { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(val), || { @@ -873,7 +1008,7 @@ impl SideMetadataSpec { val: T, order: Ordering, ) -> T { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, Some(val), || { @@ -908,7 +1043,7 @@ impl SideMetadataSpec { fetch_order: Ordering, mut f: F, ) -> std::result::Result { - self.side_metadata_access::( + self.side_metadata_access::( data_addr, None, move || -> std::result::Result { @@ -940,12 +1075,142 @@ impl SideMetadataSpec { |_result| { #[cfg(feature = "extreme_assertions")] if let Ok(old_val) = _result { - println!("Ok({})", old_val); sanity::verify_update::(self, data_addr, old_val, f(old_val).unwrap()) } }, ) } + + /// Search for a data address that has a non zero value in the side metadata. The search starts from the given data address (including this address), + /// and iterates backwards for the given bytes (non inclusive) before the data address. + /// + /// The data_addr and the corresponding side metadata address may not be mapped. Thus when this function checks the given data address, and + /// when it searches back, it needs to check if the address is mapped or not to avoid loading from an unmapped address. + /// + /// This function returns an address that is aligned to the region of this side metadata (`log_bytes_per_region`), and the side metadata + /// for the address is non zero. + /// + /// # Safety + /// + /// This function uses non-atomic load for the side metadata. The user needs to make sure + /// that there is no other thread that is mutating the side metadata. + pub unsafe fn find_prev_non_zero_value( + &self, + data_addr: Address, + search_limit_bytes: usize, + ) -> Option
{ + debug_assert!(search_limit_bytes > 0); + + if self.uses_contiguous_side_metadata() { + // Contiguous side metadata + self.find_prev_non_zero_value_fast::(data_addr, search_limit_bytes) + } else { + // TODO: We should be able to optimize further for this case. However, we need to be careful that the side metadata + // is not contiguous, and we need to skip to the next chunk's side metadata when we search to a different chunk. + // This won't be used for VO bit, as VO bit is global and is always contiguous. So for now, I am not bothered to do it. + warn!("We are trying to search non zero bits in an discontiguous side metadata. The performance is slow, as MMTk does not optimize for this case."); + self.find_prev_non_zero_value_simple::(data_addr, search_limit_bytes) + } + } + + fn find_prev_non_zero_value_simple( + &self, + data_addr: Address, + search_limit_bytes: usize, + ) -> Option
{ + let region_bytes = 1 << self.log_bytes_in_region; + // Figure out the range that we need to search. + let start_addr = data_addr.align_down(region_bytes); + let end_addr = data_addr + .saturating_sub(search_limit_bytes) + .align_down(region_bytes); + + let mut cursor = start_addr; + while cursor > end_addr { + // We encounter an unmapped address. Just return None. + if !cursor.is_mapped() { + return None; + } + // If we find non-zero value, just return it. + if !unsafe { self.load::(cursor).is_zero() } { + return Some(cursor); + } + cursor -= region_bytes; + } + None + } + + #[allow(clippy::let_and_return)] + fn find_prev_non_zero_value_fast( + &self, + data_addr: Address, + search_limit_bytes: usize, + ) -> Option
{ + debug_assert!(self.uses_contiguous_side_metadata()); + + // Quick check if the data address is mapped at all. + if !data_addr.is_mapped() { + return None; + } + // Quick check if the current data_addr has a non zero value. + if !unsafe { self.load::(data_addr).is_zero() } { + return Some(data_addr.align_down(1 << self.log_bytes_in_region)); + } + + // Figure out the start and end data address. + let start_addr = data_addr.saturating_sub(search_limit_bytes) + 1usize; + let end_addr = data_addr; + + // Then figure out the start and end metadata address and bits. + let start_meta_addr = address_to_contiguous_meta_address(self, start_addr); + let start_meta_shift = meta_byte_lshift(self, start_addr); + let end_meta_addr = address_to_contiguous_meta_address(self, end_addr); + let end_meta_shift = meta_byte_lshift(self, end_addr); + + // The result will be set by one of the following closures. + // Use Cell so it doesn't need to be mutably borrowed by the two closures which Rust will complain. + let res = std::cell::Cell::new(None); + + let check_bytes_backwards = |start: Address, end: Address| -> bool { + match helpers::find_last_non_zero_bit_in_metadata_bytes(start, end) { + helpers::FindMetaBitResult::Found { addr, bit } => { + res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } + }; + let check_bits_backwards = |addr: Address, start_bit: u8, end_bit: u8| -> bool { + match helpers::find_last_non_zero_bit_in_metadata_bits(addr, start_bit, end_bit) { + helpers::FindMetaBitResult::Found { addr, bit } => { + res.set(Some(contiguous_meta_address_to_address(self, addr, bit))); + // Return true to abort the search. We found the bit. + true + } + // If we see unmapped metadata, we don't need to search any more. + helpers::FindMetaBitResult::UnmappedMetadata => true, + // Return false to continue searching. + helpers::FindMetaBitResult::NotFound => false, + } + }; + + Self::iterate_meta_bits( + start_meta_addr, + start_meta_shift, + end_meta_addr, + end_meta_shift, + false, + &check_bytes_backwards, + &check_bits_backwards, + ); + + res.get() + .map(|addr| addr.align_down(1 << self.log_bytes_in_region)) + } } impl fmt::Debug for SideMetadataSpec { @@ -1325,6 +1590,8 @@ mod tests { use crate::util::test_util::{serial_test, with_cleanup}; use paste::paste; + const TEST_LOG_BYTES_IN_REGION: usize = 12; + fn test_side_metadata( log_bits: usize, f: impl Fn(&SideMetadataSpec, Address, Address) + std::panic::RefUnwindSafe, @@ -1335,7 +1602,7 @@ mod tests { is_global: true, offset: SideMetadataOffset::addr(GLOBAL_SIDE_METADATA_BASE_ADDRESS), log_num_of_bits: log_bits, - log_bytes_in_region: 12, // page size + log_bytes_in_region: TEST_LOG_BYTES_IN_REGION, // page size }; let context = SideMetadataContext { global: vec![spec], @@ -1345,6 +1612,8 @@ mod tests { sanity.verify_metadata_context("TestPolicy", &context); let data_addr = vm_layout::vm_layout().heap_start; + // Make sure the address is mapped. + crate::MMAPPER.ensure_mapped(data_addr, 1).unwrap(); let meta_addr = address_to_meta_address(&spec, data_addr); with_cleanup( || { @@ -1694,6 +1963,79 @@ mod tests { assert_eq!(unsafe { *meta_ptr }, <$type>::MAX); }); } + + #[test] + fn [<$tname _find_prev_non_zero_value_easy>]() { + test_side_metadata($log_bits, |spec, data_addr, _meta_addr| { + let max_value: $type = max_value($log_bits) as _; + // Store non zero value at data_addr + spec.store_atomic::<$type>(data_addr, max_value, Ordering::SeqCst); + + // Find the value starting from data_addr, at max 8 bytes. + // We should find data_addr + let res_addr = unsafe { spec.find_prev_non_zero_value::<$type>(data_addr, 8) }; + assert!(res_addr.is_some()); + assert_eq!(res_addr.unwrap(), data_addr); + }); + } + + #[test] + fn [<$tname _find_prev_non_zero_value_arbitrary_bytes>]() { + test_side_metadata($log_bits, |spec, data_addr, _meta_addr| { + let max_value: $type = max_value($log_bits) as _; + // Store non zero value at data_addr + spec.store_atomic::<$type>(data_addr, max_value, Ordering::SeqCst); + + // Start from data_addr, we offset arbitrary length, and search back to find data_addr + let test_region = (1 << TEST_LOG_BYTES_IN_REGION); + for len in 1..(test_region*4) { + let start_addr = data_addr + len; + // Use len+1, as len is non inclusive. + let res_addr = unsafe { spec.find_prev_non_zero_value::<$type>(start_addr, len + 1) }; + assert!(res_addr.is_some()); + assert_eq!(res_addr.unwrap(), data_addr); + } + }); + } + + #[test] + fn [<$tname _find_prev_non_zero_value_arbitrary_start>]() { + test_side_metadata($log_bits, |spec, data_addr, _meta_addr| { + let max_value: $type = max_value($log_bits) as _; + + // data_addr has a non-aligned offset + for offset in 0..7usize { + // Apply offset and test with the new data addr + let test_data_addr = data_addr + offset; + spec.store_atomic::<$type>(test_data_addr, max_value, Ordering::SeqCst); + + // The return result should be aligned + let res_addr = unsafe { spec.find_prev_non_zero_value::<$type>(test_data_addr, 4096) }; + assert!(res_addr.is_some()); + assert_eq!(res_addr.unwrap(), data_addr); + + // Clear whatever is set + spec.store_atomic::<$type>(test_data_addr, 0, Ordering::SeqCst); + } + }); + } + + #[test] + fn [<$tname _find_prev_non_zero_value_no_find>]() { + test_side_metadata($log_bits, |spec, data_addr, _meta_addr| { + // Store zero value at data_addr -- so we won't find anything + spec.store_atomic::<$type>(data_addr, 0, Ordering::SeqCst); + + // Start from data_addr, we offset arbitrary length, and search back + let test_region = (1 << TEST_LOG_BYTES_IN_REGION); + for len in 1..(test_region*4) { + let start_addr = data_addr + len; + // Use len+1, as len is non inclusive. + let res_addr = unsafe { spec.find_prev_non_zero_value::<$type>(start_addr, len + 1) }; + assert!(res_addr.is_none()); + } + }); + } } } } diff --git a/src/util/metadata/side_metadata/helpers.rs b/src/util/metadata/side_metadata/helpers.rs index 3fc9762a95..b9bf197dfb 100644 --- a/src/util/metadata/side_metadata/helpers.rs +++ b/src/util/metadata/side_metadata/helpers.rs @@ -16,15 +16,46 @@ pub(super) fn address_to_contiguous_meta_address( let log_bits_num = metadata_spec.log_num_of_bits as i32; let log_bytes_in_region = metadata_spec.log_bytes_in_region; - let rshift = (LOG_BITS_IN_BYTE as i32) - log_bits_num; + let shift = (LOG_BITS_IN_BYTE as i32) - log_bits_num; - if rshift >= 0 { - metadata_spec.get_absolute_offset() + ((data_addr >> log_bytes_in_region) >> rshift) + if shift >= 0 { + metadata_spec.get_absolute_offset() + ((data_addr >> log_bytes_in_region) >> shift) } else { - metadata_spec.get_absolute_offset() + ((data_addr >> log_bytes_in_region) << (-rshift)) + metadata_spec.get_absolute_offset() + ((data_addr >> log_bytes_in_region) << (-shift)) } } +/// Performs reverse address translation from contiguous metadata bits to data addresses. +/// +/// Arguments: +/// * `metadata_spec`: The side metadata spec. It should be contiguous side metadata. +/// * `metadata_addr`; The metadata address. Returned by [`address_to_contiguous_meta_address`]. +/// * `bit`: The bit shift for the metadata. Returned by [`meta_byte_lshift`]. +pub(super) fn contiguous_meta_address_to_address( + metadata_spec: &SideMetadataSpec, + metadata_addr: Address, + bit: u8, +) -> Address { + let shift = (LOG_BITS_IN_BYTE as i32) - metadata_spec.log_num_of_bits as i32; + let relative_meta_addr = metadata_addr - metadata_spec.get_absolute_offset(); + + let data_addr_intermediate = if shift >= 0 { + relative_meta_addr << shift + } else { + relative_meta_addr >> (-shift) + }; + let data_addr_bit_shift = if shift >= 0 { + metadata_spec.log_bytes_in_region - metadata_spec.log_num_of_bits + } else { + metadata_spec.log_bytes_in_region + }; + + let data_addr = (data_addr_intermediate << metadata_spec.log_bytes_in_region) + + ((bit as usize) << data_addr_bit_shift); + + unsafe { Address::from_usize(data_addr) } +} + /// Unmaps the specified metadata range, or panics. #[cfg(test)] pub(crate) fn ensure_munmap_metadata(start: Address, size: usize) { @@ -134,3 +165,218 @@ pub(super) fn meta_byte_mask(metadata_spec: &SideMetadataSpec) -> u8 { let bits_num_log = metadata_spec.log_num_of_bits; ((1usize << (1usize << bits_num_log)) - 1) as u8 } + +/// The result type for find meta bits functions. +pub enum FindMetaBitResult { + Found { addr: Address, bit: u8 }, + NotFound, + UnmappedMetadata, +} + +// Check and find the last bit that is set. We try load words where possible, and fall back to load bytes. +pub fn find_last_non_zero_bit_in_metadata_bytes( + meta_start: Address, + meta_end: Address, +) -> FindMetaBitResult { + use crate::util::constants::BYTES_IN_ADDRESS; + use crate::util::heap::vm_layout::MMAP_CHUNK_BYTES; + + let mut cur = meta_end; + // We need to check if metadata address is mapped or not. But we only check at chunk granularity. + // This records the start of a chunk that is tested to be mapped. + let mut mapped_chunk = Address::MAX; + while cur > meta_start { + // If we can check the whole word, set step to word size. Otherwise, the step is 1 (byte) and we check byte. + let step = if cur.is_aligned_to(BYTES_IN_ADDRESS) + && cur.align_down(BYTES_IN_ADDRESS) >= meta_start + { + BYTES_IN_ADDRESS + } else { + 1 + }; + // Move to the address so we can load from it + cur -= step; + + // If we are looking at an address that is not in a mapped chunk, we need to check if the chunk if mapped. + if cur < mapped_chunk { + if cur.is_mapped() { + // This is mapped. No need to check for this chunk. + mapped_chunk = cur.align_down(MMAP_CHUNK_BYTES); + } else { + return FindMetaBitResult::UnmappedMetadata; + } + } + + if step == BYTES_IN_ADDRESS { + // Load and check a usize word + let value = unsafe { cur.load::() }; + if value != 0 { + // Find the exact non-zero byte within the usize using bitwise operations + let byte_offset = (value.trailing_zeros() / 8) as usize; + let byte_addr = cur + byte_offset; + let byte_value: u8 = ((value >> (byte_offset * 8)) & 0xFF) as u8; + let bit = find_last_non_zero_bit_in_u8(byte_value).unwrap(); + return FindMetaBitResult::Found { + addr: byte_addr, + bit, + }; + } + } else { + // Load and check a byte + let value = unsafe { cur.load::() }; + if let Some(bit) = find_last_non_zero_bit_in_u8(value) { + return FindMetaBitResult::Found { addr: cur, bit }; + } + } + } + FindMetaBitResult::NotFound +} + +// Check and find the last non-zero bit in the same byte. +pub fn find_last_non_zero_bit_in_metadata_bits( + addr: Address, + start_bit: u8, + end_bit: u8, +) -> FindMetaBitResult { + if !addr.is_mapped() { + return FindMetaBitResult::UnmappedMetadata; + } + let byte = unsafe { addr.load::() }; + if let Some(bit) = find_last_non_zero_bit_in_u8(byte) { + if bit >= start_bit && bit < end_bit { + return FindMetaBitResult::Found { addr, bit }; + } + } + FindMetaBitResult::NotFound +} + +fn find_last_non_zero_bit_in_u8(byte_value: u8) -> Option { + if byte_value != 0 { + let bit = byte_value.trailing_zeros(); + debug_assert!(bit < 8); + Some(bit as u8) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::metadata::side_metadata::*; + + fn test_round_trip_conversion(spec: &SideMetadataSpec, test_data: &[Address]) { + for ref_addr in test_data { + let addr = *ref_addr; + + // This is an aligned address. When we do roundtrip conversion, we will get back the original address. + { + assert!(addr.is_aligned_to(1 << spec.log_bytes_in_region)); + let meta_addr = address_to_contiguous_meta_address(spec, addr); + let shift = meta_byte_lshift(spec, addr); + assert_eq!( + contiguous_meta_address_to_address(spec, meta_addr, shift), + addr + ); + } + + // This is an unaligned address. When we do roundtrip conversion, we will get the aligned address. + { + let next_addr = addr + 1usize; + let meta_addr = address_to_contiguous_meta_address(spec, next_addr); + let shift = meta_byte_lshift(spec, next_addr); + assert_eq!( + contiguous_meta_address_to_address(spec, meta_addr, shift), + addr + ); // we get back addr (which is the aligned address) + } + } + } + + const TEST_ADDRESS_8B_REGION: [Address; 8] = [ + unsafe { Address::from_usize(0x8000_0000) }, + unsafe { Address::from_usize(0x8000_0008) }, + unsafe { Address::from_usize(0x8000_0010) }, + unsafe { Address::from_usize(0x8000_0018) }, + unsafe { Address::from_usize(0x8000_0020) }, + unsafe { Address::from_usize(0x8001_0000) }, + unsafe { Address::from_usize(0x8001_0008) }, + unsafe { Address::from_usize(0xd000_0000) }, + ]; + + #[test] + fn test_contiguous_metadata_conversion_0_3() { + let spec = SideMetadataSpec { + name: "ContiguousMetadataTestSpec", + is_global: true, + offset: SideMetadataOffset::addr(GLOBAL_SIDE_METADATA_BASE_ADDRESS), + log_num_of_bits: 0, + log_bytes_in_region: 3, + }; + + test_round_trip_conversion(&spec, &TEST_ADDRESS_8B_REGION); + } + + #[test] + fn test_contiguous_metadata_conversion_1_3() { + let spec = SideMetadataSpec { + name: "ContiguousMetadataTestSpec", + is_global: true, + offset: SideMetadataOffset::addr(GLOBAL_SIDE_METADATA_BASE_ADDRESS), + log_num_of_bits: 1, + log_bytes_in_region: 3, + }; + + test_round_trip_conversion(&spec, &TEST_ADDRESS_8B_REGION); + } + + #[test] + fn test_contiguous_metadata_conversion_4_3() { + let spec = SideMetadataSpec { + name: "ContiguousMetadataTestSpec", + is_global: true, + offset: SideMetadataOffset::addr(GLOBAL_SIDE_METADATA_BASE_ADDRESS), + log_num_of_bits: 4, + log_bytes_in_region: 3, + }; + + test_round_trip_conversion(&spec, &TEST_ADDRESS_8B_REGION); + } + + #[test] + fn test_contiguous_metadata_conversion_5_3() { + let spec = SideMetadataSpec { + name: "ContiguousMetadataTestSpec", + is_global: true, + offset: SideMetadataOffset::addr(GLOBAL_SIDE_METADATA_BASE_ADDRESS), + log_num_of_bits: 5, + log_bytes_in_region: 3, + }; + + test_round_trip_conversion(&spec, &TEST_ADDRESS_8B_REGION); + } + + const TEST_ADDRESS_4KB_REGION: [Address; 8] = [ + unsafe { Address::from_usize(0x8000_0000) }, + unsafe { Address::from_usize(0x8000_1000) }, + unsafe { Address::from_usize(0x8000_2000) }, + unsafe { Address::from_usize(0x8000_3000) }, + unsafe { Address::from_usize(0x8000_4000) }, + unsafe { Address::from_usize(0x8001_0000) }, + unsafe { Address::from_usize(0x8001_1000) }, + unsafe { Address::from_usize(0xd000_0000) }, + ]; + + #[test] + fn test_contiguous_metadata_conversion_0_12() { + let spec = SideMetadataSpec { + name: "ContiguousMetadataTestSpec", + is_global: true, + offset: SideMetadataOffset::addr(GLOBAL_SIDE_METADATA_BASE_ADDRESS), + log_num_of_bits: 0, + log_bytes_in_region: 12, // 4K + }; + + test_round_trip_conversion(&spec, &TEST_ADDRESS_4KB_REGION); + } +} diff --git a/src/util/metadata/side_metadata/sanity.rs b/src/util/metadata/side_metadata/sanity.rs index 46f9889bf8..48d3c55de9 100644 --- a/src/util/metadata/side_metadata/sanity.rs +++ b/src/util/metadata/side_metadata/sanity.rs @@ -651,10 +651,6 @@ pub fn verify_update( // truncate the new_val if metadata's bits is fewer than the type's bits let new_val_wrapped = truncate_value::(metadata_spec.log_num_of_bits, new_val.to_u64().unwrap()); - println!( - "verify_update old = {} new = {} wrapped = {:x}", - old_val, new_val, new_val_wrapped - ); let sanity_map = &mut CONTENT_SANITY_MAP.write().unwrap(); match sanity_map.get_mut(metadata_spec) { diff --git a/src/util/metadata/vo_bit/mod.rs b/src/util/metadata/vo_bit/mod.rs index a64c8f7908..e7b0f2551b 100644 --- a/src/util/metadata/vo_bit/mod.rs +++ b/src/util/metadata/vo_bit/mod.rs @@ -100,20 +100,11 @@ pub fn is_vo_bit_set(object: ObjectReference) -> bool { /// Check if an address can be turned directly into an object reference using the VO bit. /// If so, return `Some(object)`. Otherwise return `None`. pub fn is_vo_bit_set_for_addr(address: Address) -> Option { - let potential_object = ObjectReference::from_raw_address(address)?; - - let addr = potential_object.to_address::(); - - // If we haven't mapped VO bit for the address, it cannot be an object - if !VO_BIT_SIDE_METADATA_SPEC.is_mapped(addr) { + // if the address is not aligned, it cannot be an object reference. + if !address.is_aligned_to(ObjectReference::ALIGNMENT) { return None; } - - if VO_BIT_SIDE_METADATA_SPEC.load_atomic::(addr, Ordering::SeqCst) == 1 { - Some(potential_object) - } else { - None - } + is_vo_bit_set_inner::(address) } /// Check if an address can be turned directly into an object reference using the VO bit. @@ -124,17 +115,28 @@ pub fn is_vo_bit_set_for_addr(address: Address) -> Option(address: Address) -> Option { - let potential_object = ObjectReference::from_raw_address(address)?; + is_vo_bit_set_inner::(address) +} - let addr = potential_object.to_address::(); +fn is_vo_bit_set_inner( + address: Address, +) -> Option { + let addr = get_in_object_address_for_potential_object::(address); // If we haven't mapped VO bit for the address, it cannot be an object if !VO_BIT_SIDE_METADATA_SPEC.is_mapped(addr) { return None; } - if VO_BIT_SIDE_METADATA_SPEC.load::(addr) == 1 { - Some(potential_object) + let vo_bit = if ATOMIC { + VO_BIT_SIDE_METADATA_SPEC.load_atomic::(addr, Ordering::SeqCst) + } else { + unsafe { VO_BIT_SIDE_METADATA_SPEC.load::(addr) } + }; + + if vo_bit == 1 { + let obj = get_object_ref_for_vo_addr::(addr); + Some(obj) } else { None } @@ -160,3 +162,78 @@ pub fn bcopy_vo_bit_from_mark_bit(start: Address, size: usize) { let side_mark_bit_spec = mark_bit_spec.extract_side_spec(); VO_BIT_SIDE_METADATA_SPEC.bcopy_metadata_contiguous(start, size, side_mark_bit_spec); } + +use crate::util::constants::{LOG_BITS_IN_BYTE, LOG_BYTES_IN_ADDRESS}; + +/// How many data memory bytes does 1 word in the VO bit side metadata represents? +pub const VO_BIT_WORD_TO_REGION: usize = 1 + << (VO_BIT_SIDE_METADATA_SPEC.log_bytes_in_region + + LOG_BITS_IN_BYTE as usize + + LOG_BYTES_IN_ADDRESS as usize + - VO_BIT_SIDE_METADATA_SPEC.log_num_of_bits); + +/// Bulk check if a VO bit word. Return true if there is any bit set in the word. +pub fn get_raw_vo_bit_word(addr: Address) -> usize { + unsafe { VO_BIT_SIDE_METADATA_SPEC.load_raw_word(addr) } +} + +/// Find the base reference to the object from a potential internal pointer. +pub fn find_object_from_internal_pointer( + start: Address, + search_limit_bytes: usize, +) -> Option { + if !start.is_mapped() { + return None; + } + + if let Some(vo_addr) = unsafe { + VO_BIT_SIDE_METADATA_SPEC.find_prev_non_zero_value::(start, search_limit_bytes) + } { + is_internal_ptr_from_vo_bit::(vo_addr, start) + } else { + None + } +} + +/// Turning a potential object reference into its in-object address (the ref_to_address address) where the metadata is set for. +fn get_in_object_address_for_potential_object(potential_obj: Address) -> Address { + potential_obj.offset(VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET) +} + +/// Get the object reference from an aligned address where VO bit is set. +fn get_object_ref_for_vo_addr(vo_addr: Address) -> ObjectReference { + let addr = vo_addr.offset(-VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET); + let aligned = addr.align_up(ObjectReference::ALIGNMENT); + unsafe { ObjectReference::from_raw_address_unchecked(aligned) } +} + +/// Check if the address could be an internal pointer in the object. +fn is_internal_ptr(obj: ObjectReference, internal_ptr: Address) -> bool { + let obj_start = obj.to_object_start::(); + let obj_size = VM::VMObjectModel::get_current_size(obj); + internal_ptr < obj_start + obj_size +} + +/// Check if the address could be an internal pointer based on where VO bit is set. +pub fn is_internal_ptr_from_vo_bit( + vo_addr: Address, + internal_ptr: Address, +) -> Option { + // VO bit should be set on the address. + debug_assert!(unsafe { is_vo_addr(vo_addr) }); + + let obj = get_object_ref_for_vo_addr::(vo_addr); + if is_internal_ptr::(obj, internal_ptr) { + Some(obj) + } else { + None + } +} + +/// Non-atomically check if the VO bit is set for this address. +/// +/// # Safety +/// The caller needs to make sure that no one is modifying VO bit. +pub unsafe fn is_vo_addr(addr: Address) -> bool { + VO_BIT_SIDE_METADATA_SPEC.load::(addr) != 0 +} diff --git a/src/util/mod.rs b/src/util/mod.rs index da38934e63..8036ad7ae1 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -19,10 +19,19 @@ pub mod constants; pub mod conversions; /// The copy allocators for a GC worker. pub mod copy; +/// Heap implementation, including page resource, mmapper, etc. +pub mod heap; +/// Checking if an address is an valid MMTk object. +#[cfg(feature = "is_mmtk_object")] +pub mod is_mmtk_object; /// Linear scan through a heap range pub mod linear_scan; +/// Various malloc implementations (conditionally compiled by features) +pub mod malloc; /// Wrapper functions for memory syscalls such as mmap, mprotect, etc. pub mod memory; +/// Metadata (OnSide or InHeader) implementation. +pub mod metadata; /// Opaque pointers used in MMTk, e.g. VMThread. pub mod opaque_pointer; /// MMTk command line options. @@ -39,17 +48,8 @@ pub(crate) mod analysis; pub(crate) mod erase_vm; /// Finalization implementation. pub(crate) mod finalizable_processor; -/// Heap implementation, including page resource, mmapper, etc. -pub mod heap; -/// Checking if an address is an valid MMTk object. -#[cfg(feature = "is_mmtk_object")] -pub mod is_mmtk_object; /// Logger initialization pub(crate) mod logger; -/// Various malloc implementations (conditionally compiled by features) -pub mod malloc; -/// Metadata (OnSide or InHeader) implementation. -pub mod metadata; /// Forwarding word in object copying. pub(crate) mod object_forwarding; /// Reference processing implementation. diff --git a/src/util/options.rs b/src/util/options.rs index b8f8cebe9d..dd773989bd 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -25,7 +25,7 @@ pub enum NurseryZeroingOptions { } /// Select a GC plan for MMTk. -#[derive(Copy, Clone, EnumString, Debug)] +#[derive(Copy, Clone, EnumString, Debug, PartialEq, Eq)] pub enum PlanSelector { /// Allocation only without a collector. This is usually used for debugging. /// Similar to OpenJDK epsilon (). diff --git a/src/vm/tests/mock_tests/mock_test_conservatism.rs b/src/vm/tests/mock_tests/mock_test_conservatism.rs index 9f3d47db04..2b06467625 100644 --- a/src/vm/tests/mock_tests/mock_test_conservatism.rs +++ b/src/vm/tests/mock_tests/mock_test_conservatism.rs @@ -33,17 +33,26 @@ fn assert_filter_fail(addr: Address) { } fn assert_valid_objref(addr: Address) { + let obj = memory_manager::is_mmtk_object(addr); assert!( - memory_manager::is_mmtk_object(addr), - "mmtk_is_mmtk_object({}) should return true. Got false.", + obj.is_some(), + "mmtk_is_mmtk_object({}) should return Some. Got None.", addr, ); + assert_eq!( + obj.unwrap().to_raw_address(), + addr, + "mmtk_is_mmtk_object({}) should return Some({}). Got {:?}", + addr, + addr, + obj + ); } fn assert_invalid_objref(addr: Address, real: Address) { assert!( - !memory_manager::is_mmtk_object(addr), - "mmtk_is_mmtk_object({}) should return false. Got true. Real object: {}", + memory_manager::is_mmtk_object(addr).is_none(), + "mmtk_is_mmtk_object({}) should return None. Got Some. Real object: {}", addr, real, ); diff --git a/src/vm/tests/mock_tests/mock_test_internal_ptr_before_object_ref.rs b/src/vm/tests/mock_tests/mock_test_internal_ptr_before_object_ref.rs new file mode 100644 index 0000000000..007d3ab99d --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_internal_ptr_before_object_ref.rs @@ -0,0 +1,63 @@ +// GITHUB-CI: MMTK_PLAN=all +// GITHUB-CI: FEATURES=is_mmtk_object + +use super::mock_test_prelude::*; + +use crate::AllocationSemantics; + +#[test] +pub fn interior_pointer_before_object_ref() { + const MB: usize = 1024 * 1024; + const OBJECT_SIZE: usize = 16; + with_mockvm( + || -> MockVM { + MockVM { + get_object_size: MockMethod::new_fixed(Box::new(|_| OBJECT_SIZE)), + ..MockVM::default() + } + }, + || { + let mut fixture = MutatorFixture::create_with_heapsize(10 * MB); + + let addr = memory_manager::alloc( + &mut fixture.mutator, + OBJECT_SIZE, + 8, + 0, + AllocationSemantics::Default, + ); + assert!(!addr.is_zero()); + + let obj = MockVM::object_start_to_ref(addr); + println!( + "start = {}, end = {}, obj = {}, in-obj addr = {}", + addr, + addr + OBJECT_SIZE, + obj, + obj.to_address::() + ); + memory_manager::post_alloc( + &mut fixture.mutator, + obj, + OBJECT_SIZE, + AllocationSemantics::Default, + ); + + // Forge a pointer that points before the object reference, but after in-object address. MMTk should still find the base reference properly. + + let before_obj_ref = addr; + assert!(before_obj_ref < obj.to_raw_address()); + assert!(before_obj_ref >= obj.to_address::()); + + println!("Check {:?}", before_obj_ref); + let base_ref = crate::memory_manager::find_object_from_internal_pointer::( + before_obj_ref, + usize::MAX, + ); + println!("base_ref {:?}", base_ref); + assert!(base_ref.is_some()); + assert_eq!(base_ref.unwrap(), obj); + }, + no_cleanup, + ) +} diff --git a/src/vm/tests/mock_tests/mock_test_internal_ptr_invalid.rs b/src/vm/tests/mock_tests/mock_test_internal_ptr_invalid.rs new file mode 100644 index 0000000000..14de366798 --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_internal_ptr_invalid.rs @@ -0,0 +1,39 @@ +// GITHUB-CI: MMTK_PLAN=all +// GITHUB-CI: FEATURES=is_mmtk_object + +use super::mock_test_prelude::*; + +use crate::util::*; + +#[test] +pub fn interior_pointer_invalid() { + const MB: usize = 1024 * 1024; + with_mockvm( + default_setup, + || { + // Set up MMTk even if we don't use it. + let _ = MutatorFixture::create_with_heapsize(10 * MB); + + let assert_no_object = |addr: Address| { + let base_ref = crate::memory_manager::find_object_from_internal_pointer::( + addr, + usize::MAX, + ); + assert!(base_ref.is_none()); + }; + + let heap_start = crate::util::heap::layout::vm_layout::vm_layout().heap_start; + for offset in 0..16usize { + let addr = heap_start + offset; + assert_no_object(addr); + } + + let heap_end = crate::util::heap::layout::vm_layout::vm_layout().heap_end; + for offset in 0..16usize { + let addr = heap_end - offset; + assert_no_object(addr); + } + }, + no_cleanup, + ) +} diff --git a/src/vm/tests/mock_tests/mock_test_internal_ptr_large_object_multi_page.rs b/src/vm/tests/mock_tests/mock_test_internal_ptr_large_object_multi_page.rs new file mode 100644 index 0000000000..1855fcd54d --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_internal_ptr_large_object_multi_page.rs @@ -0,0 +1,82 @@ +// GITHUB-CI: MMTK_PLAN=all +// GITHUB-CI: FEATURES=is_mmtk_object + +use super::mock_test_prelude::*; + +use crate::util::*; +use crate::AllocationSemantics; + +#[test] +pub fn interior_pointer_in_large_object() { + const MB: usize = 1024 * 1024; + const OBJECT_SIZE: usize = MB; + with_mockvm( + || -> MockVM { + MockVM { + get_object_size: MockMethod::new_fixed(Box::new(|_| OBJECT_SIZE)), + ..MockVM::default() + } + }, + || { + let mut fixture = MutatorFixture::create_with_heapsize(10 * MB); + + let addr = memory_manager::alloc( + &mut fixture.mutator, + OBJECT_SIZE, + 8, + 0, + AllocationSemantics::Los, + ); + assert!(!addr.is_zero()); + + let obj = MockVM::object_start_to_ref(addr); + println!( + "start = {}, end = {}, obj = {}, in-obj addr = {}", + addr, + addr + OBJECT_SIZE, + obj, + obj.to_address::() + ); + + memory_manager::post_alloc( + &mut fixture.mutator, + obj, + OBJECT_SIZE, + AllocationSemantics::Los, + ); + + let test_internal_ptr = + |ptr: Address| { + println!("ptr = {}", ptr); + if ptr > addr + OBJECT_SIZE { + // not internal pointer + let base_ref = crate::memory_manager::find_object_from_internal_pointer::< + MockVM, + >(ptr, usize::MAX); + println!("{:?}", base_ref); + assert!(base_ref.is_none()); + } else { + // is internal pointer + let base_ref = crate::memory_manager::find_object_from_internal_pointer::< + MockVM, + >(ptr, usize::MAX); + assert!(base_ref.is_some()); + assert_eq!(base_ref.unwrap(), obj); + } + }; + + // Test with the first 1024 bytes as offset in the object + for offset in 0..1024usize { + let ptr = obj.to_raw_address() + offset; + test_internal_ptr(ptr); + } + + // Test with the first 1024 bytes after the object size + for offset in OBJECT_SIZE..(OBJECT_SIZE + 1024) { + let ptr = obj.to_raw_address() + offset; + test_internal_ptr(ptr); + } + }, + no_cleanup, + ) +} diff --git a/src/vm/tests/mock_tests/mock_test_internal_ptr_large_object_same_page.rs b/src/vm/tests/mock_tests/mock_test_internal_ptr_large_object_same_page.rs new file mode 100644 index 0000000000..ab36b18249 --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_internal_ptr_large_object_same_page.rs @@ -0,0 +1,75 @@ +// GITHUB-CI: MMTK_PLAN=all +// GITHUB-CI: FEATURES=is_mmtk_object + +use super::mock_test_prelude::*; + +use crate::AllocationSemantics; + +#[test] +pub fn interior_pointer_in_large_object_same_page() { + const MB: usize = 1024 * 1024; + // Usually we will not see allocation in large object space that is smaller than a page. + // But let's allow it for the page protect plan. + const OBJECT_SIZE: usize = 256; + with_mockvm( + || -> MockVM { + MockVM { + get_object_size: MockMethod::new_fixed(Box::new(|_| OBJECT_SIZE)), + ..MockVM::default() + } + }, + || { + let mut fixture = MutatorFixture::create_with_heapsize(10 * MB); + + let addr = memory_manager::alloc( + &mut fixture.mutator, + OBJECT_SIZE, + 8, + 0, + AllocationSemantics::Los, + ); + assert!(!addr.is_zero()); + + let obj = MockVM::object_start_to_ref(addr); + println!( + "start = {}, end = {}, obj = {}, in-obj addr = {}", + addr, + addr + OBJECT_SIZE, + obj, + obj.to_address::() + ); + + memory_manager::post_alloc( + &mut fixture.mutator, + obj, + OBJECT_SIZE, + AllocationSemantics::Los, + ); + + let ptr = obj.to_raw_address(); + let base_ref = crate::memory_manager::find_object_from_internal_pointer::( + ptr, + OBJECT_SIZE, + ); + println!("{:?}", base_ref); + assert!(base_ref.is_some()); + assert_eq!(base_ref.unwrap(), obj); + + let ptr = obj.to_raw_address() + OBJECT_SIZE / 2; + let base_ref = crate::memory_manager::find_object_from_internal_pointer::( + ptr, + OBJECT_SIZE, + ); + assert!(base_ref.is_some()); + assert_eq!(base_ref.unwrap(), obj); + + let ptr = obj.to_raw_address() + OBJECT_SIZE; + let base_ref = crate::memory_manager::find_object_from_internal_pointer::( + ptr, + OBJECT_SIZE, + ); + assert!(base_ref.is_none()); + }, + no_cleanup, + ) +} diff --git a/src/vm/tests/mock_tests/mock_test_internal_ptr_normal_object.rs b/src/vm/tests/mock_tests/mock_test_internal_ptr_normal_object.rs new file mode 100644 index 0000000000..fdfddd6c6c --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_internal_ptr_normal_object.rs @@ -0,0 +1,92 @@ +// GITHUB-CI: MMTK_PLAN=all +// GITHUB-CI: FEATURES=is_mmtk_object + +use super::mock_test_prelude::*; + +use crate::util::*; +use crate::AllocationSemantics; + +#[test] +pub fn interior_pointer_in_normal_object() { + const MB: usize = 1024 * 1024; + const OBJECT_SIZE: usize = 16; + with_mockvm( + || -> MockVM { + MockVM { + get_object_size: MockMethod::new_fixed(Box::new(|_| OBJECT_SIZE)), + ..MockVM::default() + } + }, + || { + let mut fixture = MutatorFixture::create_with_heapsize(10 * MB); + + let mut test_obj = || { + let addr = memory_manager::alloc( + &mut fixture.mutator, + OBJECT_SIZE, + 8, + 0, + AllocationSemantics::Default, + ); + assert!(!addr.is_zero()); + + let obj = MockVM::object_start_to_ref(addr); + println!( + "start = {}, end = {}, obj = {}, in-obj addr = {}", + addr, + addr + OBJECT_SIZE, + obj, + obj.to_address::() + ); + memory_manager::post_alloc( + &mut fixture.mutator, + obj, + OBJECT_SIZE, + AllocationSemantics::Default, + ); + + let test_internal_ptr = |ptr: Address| { + if ptr >= addr + OBJECT_SIZE { + println!("ptr = {}, not internal pointer", ptr); + // not internal pointer + let base_ref = crate::memory_manager::find_object_from_internal_pointer::< + MockVM, + >(ptr, usize::MAX); + println!("{:?}", base_ref); + assert!(base_ref.is_none()); + } else { + println!("ptr = {}, internal pointer", ptr); + // is internal pointer + let base_ref = crate::memory_manager::find_object_from_internal_pointer::< + MockVM, + >(ptr, usize::MAX); + assert!(base_ref.is_some()); + assert_eq!(base_ref.unwrap(), obj); + } + }; + + let base_ref = crate::memory_manager::find_object_from_internal_pointer::( + obj.to_raw_address(), + OBJECT_SIZE, + ); + assert!(base_ref.is_some()); + assert_eq!(base_ref.unwrap(), obj); + + // Test with the first 16 bytes as offset in the object + for offset in 0..16usize { + let ptr = obj.to_raw_address() + offset; + test_internal_ptr(ptr); + } + + // Test with the first 16 bytes after the object size + for offset in OBJECT_SIZE..(OBJECT_SIZE + 16) { + let ptr = obj.to_raw_address() + offset; + test_internal_ptr(ptr); + } + }; + + test_obj(); + }, + no_cleanup, + ) +} diff --git a/src/vm/tests/mock_tests/mod.rs b/src/vm/tests/mock_tests/mod.rs index 0ccabb40d5..751bc58c08 100644 --- a/src/vm/tests/mock_tests/mod.rs +++ b/src/vm/tests/mock_tests/mod.rs @@ -10,6 +10,9 @@ // Mock tests can be placed anywhere in the source directory `src` or the test directory `tests`. // * They need to be conditional compiled when the feature `mock_test` is enabled. Otherwise they cannot access `MockVM`. // * They should have the prefix 'mock_test_' in their file name so they will be picked up by the CI testing scripts. +// * The file name for one test cannot be the prefix of the name of another test. E.g. `mock_test_file` and `mock_test_file_large` is not allowed, +// as they share the same prefix `mock_test_file`. If we run `cargo test mock_test_file`, both would be executed which causes failures. +// We have to run each test separately. It is recommanded to name the tests as `mock_test_file_small` and `mock_test_file_large`. // Common includes for mock tests. pub(crate) mod mock_test_prelude { @@ -33,6 +36,16 @@ mod mock_test_conservatism; mod mock_test_handle_mmap_conflict; mod mock_test_handle_mmap_oom; mod mock_test_init_fork; +#[cfg(feature = "is_mmtk_object")] +mod mock_test_internal_ptr_before_object_ref; +#[cfg(feature = "is_mmtk_object")] +mod mock_test_internal_ptr_invalid; +#[cfg(feature = "is_mmtk_object")] +mod mock_test_internal_ptr_large_object_multi_page; +#[cfg(feature = "is_mmtk_object")] +mod mock_test_internal_ptr_large_object_same_page; +#[cfg(feature = "is_mmtk_object")] +mod mock_test_internal_ptr_normal_object; mod mock_test_is_in_mmtk_spaces; mod mock_test_issue139_allocate_non_multiple_of_min_alignment; mod mock_test_issue867_allocate_unrealistically_large_object;