Skip to content

Commit 5edfd85

Browse files
authored
Internal pointer support (#1165)
This PR adds internal pointer support. It supersedes #1155 which provides a simple but inefficient implementation for internal pointers. This PR is based on #1159 which adds requirements for object reference alignment. This PR * adds `memory_manager::find_object_from_internal_pointer` * The call is dispatched using SFT to each space. * Large object space only checks the first word in VO bit for every page. * Mark sweep and immix space only searches for the max object size for those spaces. * Allow iterating side metadata bits. * Allow loading raw byte/word in side metadata.
1 parent eb919f2 commit 5edfd85

30 files changed

+1503
-152
lines changed

benches/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ pub fn bench_main(_c: &mut Criterion) {
2727
match std::env::var("MMTK_BENCH") {
2828
Ok(bench) => match bench.as_str() {
2929
"alloc" => mock_bench::alloc::bench(_c),
30+
"internal_pointer" => mock_bench::internal_pointer::bench(_c),
3031
"sft" => mock_bench::sft::bench(_c),
3132
_ => panic!("Unknown benchmark {:?}", bench),
3233
},
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
use criterion::Criterion;
2+
3+
#[cfg(feature = "is_mmtk_object")]
4+
use mmtk::util::test_util::fixtures::*;
5+
use mmtk::util::test_util::mock_method::*;
6+
use mmtk::util::test_util::mock_vm::{write_mockvm, MockVM};
7+
8+
pub fn bench(c: &mut Criterion) {
9+
// Setting a larger heap, although the GC should be disabled in the MockVM
10+
#[cfg(feature = "is_mmtk_object")]
11+
let mut fixture = MutatorFixture::create_with_heapsize(1 << 30);
12+
13+
// Normal objects
14+
// 16KB object -- we want to make sure the object can fit into any normal space (e.g. immix space or mark sweep space)
15+
const NORMAL_OBJECT_SIZE: usize = 16 * 1024;
16+
write_mockvm(|mock| {
17+
*mock = MockVM {
18+
get_object_size: MockMethod::new_fixed(Box::new(|_| NORMAL_OBJECT_SIZE)),
19+
is_collection_enabled: MockMethod::new_fixed(Box::new(|_| false)),
20+
..MockVM::default()
21+
}
22+
});
23+
24+
c.bench_function("internal pointer - normal objects", |_b| {
25+
#[cfg(feature = "is_mmtk_object")]
26+
{
27+
use mmtk::memory_manager;
28+
use mmtk::AllocationSemantics;
29+
let addr = memory_manager::alloc(
30+
&mut fixture.mutator,
31+
NORMAL_OBJECT_SIZE,
32+
8,
33+
0,
34+
AllocationSemantics::Default,
35+
);
36+
let obj_ref = MockVM::object_start_to_ref(addr);
37+
memory_manager::post_alloc(
38+
&mut fixture.mutator,
39+
obj_ref,
40+
NORMAL_OBJECT_SIZE,
41+
AllocationSemantics::Default,
42+
);
43+
let obj_end = addr + NORMAL_OBJECT_SIZE;
44+
_b.iter(|| {
45+
memory_manager::find_object_from_internal_pointer::<MockVM>(
46+
obj_end - 1,
47+
NORMAL_OBJECT_SIZE,
48+
);
49+
})
50+
}
51+
#[cfg(not(feature = "is_mmtk_object"))]
52+
panic!("The benchmark requires is_mmtk_object feature to run");
53+
});
54+
55+
// Large objects
56+
// 16KB object
57+
const LARGE_OBJECT_SIZE: usize = 16 * 1024;
58+
write_mockvm(|mock| {
59+
*mock = MockVM {
60+
get_object_size: MockMethod::new_fixed(Box::new(|_| LARGE_OBJECT_SIZE)),
61+
is_collection_enabled: MockMethod::new_fixed(Box::new(|_| false)),
62+
..MockVM::default()
63+
}
64+
});
65+
c.bench_function("internal pointer - large objects", |_b| {
66+
#[cfg(feature = "is_mmtk_object")]
67+
{
68+
use mmtk::memory_manager;
69+
use mmtk::AllocationSemantics;
70+
let addr = memory_manager::alloc(
71+
&mut fixture.mutator,
72+
LARGE_OBJECT_SIZE,
73+
8,
74+
0,
75+
AllocationSemantics::Los,
76+
);
77+
let obj_ref = MockVM::object_start_to_ref(addr);
78+
memory_manager::post_alloc(
79+
&mut fixture.mutator,
80+
obj_ref,
81+
LARGE_OBJECT_SIZE,
82+
AllocationSemantics::Los,
83+
);
84+
let obj_end = addr + LARGE_OBJECT_SIZE;
85+
_b.iter(|| {
86+
memory_manager::find_object_from_internal_pointer::<MockVM>(
87+
obj_end - 1,
88+
LARGE_OBJECT_SIZE,
89+
);
90+
})
91+
}
92+
#[cfg(not(feature = "is_mmtk_object"))]
93+
panic!("The benchmark requires is_mmtk_object feature to run");
94+
});
95+
}

benches/mock_bench/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
pub mod alloc;
2+
pub mod internal_pointer;
23
pub mod sft;

docs/userguide/src/migration/prefix.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,22 @@ Notes for the mmtk-core developers:
3232

3333
## 0.27.0
3434

35+
### `is_mmtk_object` returns `Option<ObjectReference>
36+
37+
```admonish tldr
38+
`memory_manager::is_mmtk_object` now returns `Option<ObjectReference>` instead of `bool`.
39+
Bindings can use the returned object reference instead of computing the object reference at the binding side.
40+
```
41+
42+
API changes:
43+
* module `memory_manager`
44+
- `is_mmtk_object` now returns `Option<ObjectReference>`.
45+
46+
See also:
47+
48+
- PR: <https://github.com/mmtk/mmtk-core/pull/1165>
49+
- Example: <https://github.com/mmtk/mmtk-ruby/pull/86>
50+
3551
### Introduce `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`
3652

3753
```admonish tldr
@@ -49,6 +65,11 @@ API changes:
4965
- Add a constant `ALIGNMENT` which equals to the word size. All object references should be at least aligned
5066
to the word size. This is checked in debug builds when an `ObjectReference` is constructed.
5167

68+
See also:
69+
70+
- PR: <https://github.com/mmtk/mmtk-core/pull/1159>
71+
- Example: <https://github.com/mmtk/mmtk-openjdk/pull/283>
72+
5273
## 0.26.0
5374

5475
### Rename "edge" to "slot"

src/memory_manager.rs

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -588,40 +588,56 @@ pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
588588
/// Concretely:
589589
/// 1. Return true if `ObjectReference::from_raw_address(addr)` is a valid object reference to an
590590
/// object in any space in MMTk.
591-
/// 2. Also return true if there exists an `objref: ObjectReference` such that
592-
/// - `objref` is a valid object reference to an object in any space in MMTk, and
593-
/// - `lo <= objref.to_address() < hi`, where
594-
/// - `lo = addr.align_down(VO_BIT_REGION_SIZE)` and
595-
/// - `hi = lo + VO_BIT_REGION_SIZE` and
596-
/// - `VO_BIT_REGION_SIZE` is [`crate::util::is_mmtk_object::VO_BIT_REGION_SIZE`].
597-
/// It is the byte granularity of the valid object (VO) bit.
598-
/// 3. Return false otherwise. This function never panics.
599-
///
600-
/// This function uses the "valid object (VO) bits" side metadata, i.e. a bitmap.
601-
/// For space efficiency, each bit of the bitmap governs a small region of memory.
602-
/// The size of a region is currently defined as the [minimum object size](crate::util::constants::MIN_OBJECT_SIZE),
603-
/// which is currently defined as the [word size](crate::util::constants::BYTES_IN_WORD),
604-
/// which is 4 bytes on 32-bit systems or 8 bytes on 64-bit systems.
605-
/// The alignment of a region is also the region size.
606-
/// If a VO bit is `1`, the bitmap cannot tell which address within the 4-byte or 8-byte region
607-
/// is the valid object reference.
608-
/// Therefore, if this method returns true, the binding can compute the object reference by
609-
/// aligning the address to [`crate::util::ObjectReference::ALIGNMENT`].
591+
/// 2. Return false otherwise.
610592
///
611593
/// This function is useful for conservative root scanning. The VM can iterate through all words in
612594
/// a stack, filter out zeros, misaligned words, obviously out-of-range words (such as addresses
613595
/// greater than `0x0000_7fff_ffff_ffff` on Linux on x86_64), and use this function to deside if the
614596
/// word is really a reference.
615597
///
598+
/// This function does not handle internal pointers. If a binding may have internal pointers on
599+
/// the stack, and requires identifying the base reference for an internal pointer, they should use
600+
/// [`find_object_from_internal_pointer`] instead.
601+
///
616602
/// Note: This function has special behaviors if the VM space (enabled by the `vm_space` feature)
617603
/// is present. See `crate::plan::global::BasePlan::vm_space`.
618604
///
619605
/// Argument:
620606
/// * `addr`: An arbitrary address.
621607
#[cfg(feature = "is_mmtk_object")]
622-
pub fn is_mmtk_object(addr: Address) -> bool {
623-
use crate::mmtk::SFT_MAP;
624-
SFT_MAP.get_checked(addr).is_mmtk_object(addr)
608+
pub fn is_mmtk_object(addr: Address) -> Option<ObjectReference> {
609+
crate::util::is_mmtk_object::check_object_reference(addr)
610+
}
611+
612+
/// Find if there is an object with VO bit set for the given address range.
613+
/// This should be used instead of [`crate::memory_manager::is_mmtk_object`] for conservative stack scanning if
614+
/// the binding may have internal pointers on the stack.
615+
///
616+
/// Note that, we only consider pointers that point to addresses that are equal or greater than the in-object addresss
617+
/// (i.e. [`crate::util::ObjectReference::to_address()`] which is the same as `object_ref.to_raw_address() + ObjectModel::IN_OBJECT_ADDRESS_OFFSET`),
618+
/// and within the allocation as 'internal pointers'. To be precise, for each object ref `obj_ref`, internal pointers are in the range
619+
/// `[obj_ref + ObjectModel::IN_OBJECT_ADDRESS_OFFSET, ObjectModel::ref_to_object_start(obj_ref) + ObjectModel::get_current_size(obj_ref))`.
620+
/// If a binding defines internal pointers differently, calling this method is undefined behavior.
621+
/// If this is the case for you, please submit an issue or engage us on Zulip to discuss more.
622+
///
623+
/// Note that, in the similar situation as [`crate::memory_manager::is_mmtk_object`], the binding should filter
624+
/// out obvious non-pointers (e.g. alignment check, bound check, etc) before calling this function to avoid unnecessary
625+
/// cost. This method is not cheap.
626+
///
627+
/// To minimize the cost, the user should also use a small `max_search_bytes`.
628+
///
629+
/// Note: This function has special behaviors if the VM space (enabled by the `vm_space` feature)
630+
/// is present. See `crate::plan::global::BasePlan::vm_space`.
631+
///
632+
/// Argument:
633+
/// * `internal_ptr`: The address to start searching. We search backwards from this address (including this address) to find the base reference.
634+
/// * `max_search_bytes`: The maximum number of bytes we may search for an object with VO bit set. `internal_ptr - max_search_bytes` is not included.
635+
#[cfg(feature = "is_mmtk_object")]
636+
pub fn find_object_from_internal_pointer<VM: VMBinding>(
637+
internal_ptr: Address,
638+
max_search_bytes: usize,
639+
) -> Option<ObjectReference> {
640+
crate::util::is_mmtk_object::check_internal_reference(internal_ptr, max_search_bytes)
625641
}
626642

627643
/// Return true if the `object` lies in a region of memory where

src/plan/global.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,10 @@ pub struct BasePlan<VM: VMBinding> {
355355
/// If VM space is present, it has some special interaction with the
356356
/// `memory_manager::is_mmtk_object` and the `memory_manager::is_in_mmtk_spaces` functions.
357357
///
358-
/// - The `is_mmtk_object` funciton requires the valid object (VO) bit side metadata to identify objects,
359-
/// but currently we do not require the boot image to provide it, so it will not work if the
360-
/// address argument is in the VM space.
358+
/// - The functions `is_mmtk_object` and `find_object_from_internal_pointer` require
359+
/// the valid object (VO) bit side metadata to identify objects.
360+
/// If the binding maintains the VO bit for objects in VM spaces, those functions will work accordingly.
361+
/// Otherwise, calling them is undefined behavior.
361362
///
362363
/// - The `is_in_mmtk_spaces` currently returns `true` if the given object reference is in
363364
/// the VM space.

src/policy/copyspace.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,20 @@ impl<VM: VMBinding> SFT for CopySpace<VM> {
7474
}
7575

7676
#[cfg(feature = "is_mmtk_object")]
77-
fn is_mmtk_object(&self, addr: Address) -> bool {
78-
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr).is_some()
77+
fn is_mmtk_object(&self, addr: Address) -> Option<ObjectReference> {
78+
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr)
79+
}
80+
81+
#[cfg(feature = "is_mmtk_object")]
82+
fn find_object_from_internal_pointer(
83+
&self,
84+
ptr: Address,
85+
max_search_bytes: usize,
86+
) -> Option<ObjectReference> {
87+
crate::util::metadata::vo_bit::find_object_from_internal_pointer::<VM>(
88+
ptr,
89+
max_search_bytes,
90+
)
7991
}
8092

8193
fn sft_trace_object(

src/policy/immix/immixspace.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,18 @@ impl<VM: VMBinding> SFT for ImmixSpace<VM> {
141141
crate::util::metadata::vo_bit::set_vo_bit::<VM>(_object);
142142
}
143143
#[cfg(feature = "is_mmtk_object")]
144-
fn is_mmtk_object(&self, addr: Address) -> bool {
145-
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr).is_some()
144+
fn is_mmtk_object(&self, addr: Address) -> Option<ObjectReference> {
145+
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr)
146+
}
147+
#[cfg(feature = "is_mmtk_object")]
148+
fn find_object_from_internal_pointer(
149+
&self,
150+
ptr: Address,
151+
max_search_bytes: usize,
152+
) -> Option<ObjectReference> {
153+
// We don't need to search more than the max object size in the immix space.
154+
let search_bytes = usize::min(super::MAX_IMMIX_OBJECT_SIZE, max_search_bytes);
155+
crate::util::metadata::vo_bit::find_object_from_internal_pointer::<VM>(ptr, search_bytes)
146156
}
147157
fn sft_trace_object(
148158
&self,

src/policy/immortalspace.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,19 @@ impl<VM: VMBinding> SFT for ImmortalSpace<VM> {
6464
crate::util::metadata::vo_bit::set_vo_bit::<VM>(object);
6565
}
6666
#[cfg(feature = "is_mmtk_object")]
67-
fn is_mmtk_object(&self, addr: Address) -> bool {
68-
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr).is_some()
67+
fn is_mmtk_object(&self, addr: Address) -> Option<ObjectReference> {
68+
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr)
69+
}
70+
#[cfg(feature = "is_mmtk_object")]
71+
fn find_object_from_internal_pointer(
72+
&self,
73+
ptr: Address,
74+
max_search_bytes: usize,
75+
) -> Option<ObjectReference> {
76+
crate::util::metadata::vo_bit::find_object_from_internal_pointer::<VM>(
77+
ptr,
78+
max_search_bytes,
79+
)
6980
}
7081
fn sft_trace_object(
7182
&self,

src/policy/largeobjectspace.rs

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,65 @@ impl<VM: VMBinding> SFT for LargeObjectSpace<VM> {
8080

8181
#[cfg(feature = "vo_bit")]
8282
crate::util::metadata::vo_bit::set_vo_bit::<VM>(object);
83+
#[cfg(all(feature = "is_mmtk_object", debug_assertions))]
84+
{
85+
use crate::util::constants::LOG_BYTES_IN_PAGE;
86+
let vo_addr = object.to_address::<VM>();
87+
let offset_from_page_start = vo_addr & ((1 << LOG_BYTES_IN_PAGE) - 1) as usize;
88+
debug_assert!(
89+
offset_from_page_start < crate::util::metadata::vo_bit::VO_BIT_WORD_TO_REGION,
90+
"The in-object address is not in the first 512 bytes of a page. The internal pointer searching for LOS won't work."
91+
);
92+
}
93+
8394
self.treadmill.add_to_treadmill(object, alloc);
8495
}
8596
#[cfg(feature = "is_mmtk_object")]
86-
fn is_mmtk_object(&self, addr: Address) -> bool {
87-
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr).is_some()
97+
fn is_mmtk_object(&self, addr: Address) -> Option<ObjectReference> {
98+
crate::util::metadata::vo_bit::is_vo_bit_set_for_addr::<VM>(addr)
99+
}
100+
#[cfg(feature = "is_mmtk_object")]
101+
fn find_object_from_internal_pointer(
102+
&self,
103+
ptr: Address,
104+
max_search_bytes: usize,
105+
) -> Option<ObjectReference> {
106+
use crate::util::metadata::vo_bit;
107+
// For large object space, it is a bit special. We only need to check VO bit for each page.
108+
let mut cur_page = ptr.align_down(BYTES_IN_PAGE);
109+
let low_page = ptr
110+
.saturating_sub(max_search_bytes)
111+
.align_down(BYTES_IN_PAGE);
112+
while cur_page >= low_page {
113+
// If the page start is not mapped, there can't be an object in it.
114+
if !cur_page.is_mapped() {
115+
return None;
116+
}
117+
// For performance, we only check the first word which maps to the first 512 bytes in the page.
118+
// In almost all the cases, it should be sufficient.
119+
// However, if the in-object address is not in the first 512 bytes, this won't work.
120+
// We assert this when we set VO bit for LOS.
121+
if vo_bit::get_raw_vo_bit_word(cur_page) != 0 {
122+
// Find the exact address that has vo bit set
123+
for offset in 0..vo_bit::VO_BIT_WORD_TO_REGION {
124+
let addr = cur_page + offset;
125+
if unsafe { vo_bit::is_vo_addr(addr) } {
126+
let obj = vo_bit::is_internal_ptr_from_vo_bit::<VM>(addr, ptr);
127+
if obj.is_some() {
128+
return obj;
129+
} else {
130+
return None;
131+
}
132+
}
133+
}
134+
unreachable!(
135+
"We found vo bit in the raw word, but we cannot find the exact address"
136+
);
137+
}
138+
139+
cur_page -= BYTES_IN_PAGE;
140+
}
141+
None
88142
}
89143
fn sft_trace_object(
90144
&self,

0 commit comments

Comments
 (0)