From 2ea6d358efbf388a22868c192e9b44e483773d10 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Thu, 13 Jul 2023 10:15:48 +0100 Subject: [PATCH] Make gc_mark_ptr have a single responsibility Currently the GC's marking functions functions (gc_mark, gc_mark_ptr, gc_mark_children) are responsible for both marking and object tracing. This becomes awkward when we need to use object tracing for non-GC related purposes (such as dumping the heap). The current implementation relies on `gc_mark_ptr` having multiple responsibilites depending on whether it's being called during GC or during mutator work - it switches based on `objspace->flags.during_gc`. If that flag is set it runs marking, and if not it passes the pointer in question to `reachable_objects_from_callback`, which then looks at the current ractor for a function pointer to execute with the pointer as an argument. This PR removes the conditional from `gc_mark_ptr`. Making it unconditionally mark the pointer. It then pre-configures each ractor to use `gc_mark_ptr` as it's default mark function pointer, and replaces previous calls to `gc_mark_ptr` with calls to `reachable_objects_from_callback` - essentially unifying the two approaces to object tracing - In order to walk the object graph we now need to do the same steps: 1. make sure there is a function pointer attached to the ractor 2. use the iterator function `reachable_objects_from_callback` One of the challenges involved in walking the object graph, is that we cannot change the api for marking T_DATA objects. The nice thing about the original approach, is that we didn't have to - all marking callbacks in C extension objects will eventually use gc_mark_ptr - because it's used by `rb_gc_mark` which is part of the public API. This new approach also doesn't require changing the public API. We implement `rb_gc_mark` on top of `reachable_objects_from_callback` and no user facing changes have to be made to any code in C extensions. This new approach is the first step in separating gc marking from object graph traversal. This is necessary for us to start implementing alternate GC's so that we can implement custom marking and tracing functions independantly. --- gc.c | 70 ++++++++++++++++++++++++++++----------------------- internal/gc.h | 8 ++++++ ractor.c | 3 ++- ractor_core.h | 5 +--- 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/gc.c b/gc.c index 3c9adf3b007384..03e61dd4d56165 100644 --- a/gc.c +++ b/gc.c @@ -6842,37 +6842,33 @@ gc_aging(rb_objspace_t *objspace, VALUE obj) objspace->marked_slots++; } -NOINLINE(static void gc_mark_ptr(rb_objspace_t *objspace, VALUE obj)); +NOINLINE(static void gc_mark_ptr(VALUE obj, void *data)); static void reachable_objects_from_callback(VALUE obj); static void -gc_mark_ptr(rb_objspace_t *objspace, VALUE obj) +gc_mark_ptr(VALUE obj, void *data) { - if (LIKELY(during_gc)) { - rgengc_check_relation(objspace, obj); - if (!gc_mark_set(objspace, obj)) return; /* already marked */ + rb_objspace_t *objspace = (rb_objspace_t *)data; + rgengc_check_relation(objspace, obj); + if (!gc_mark_set(objspace, obj)) return; /* already marked */ - if (0) { // for debug GC marking miss - if (objspace->rgengc.parent_object) { - RUBY_DEBUG_LOG("%p (%s) parent:%p (%s)", - (void *)obj, obj_type_name(obj), - (void *)objspace->rgengc.parent_object, obj_type_name(objspace->rgengc.parent_object)); - } - else { - RUBY_DEBUG_LOG("%p (%s)", (void *)obj, obj_type_name(obj)); - } + if (0) { // for debug GC marking miss + if (objspace->rgengc.parent_object) { + RUBY_DEBUG_LOG("%p (%s) parent:%p (%s)", + (void *)obj, obj_type_name(obj), + (void *)objspace->rgengc.parent_object, obj_type_name(objspace->rgengc.parent_object)); } - - if (UNLIKELY(RB_TYPE_P(obj, T_NONE))) { - rp(obj); - rb_bug("try to mark T_NONE object"); /* check here will help debugging */ + else { + RUBY_DEBUG_LOG("%p (%s)", (void *)obj, obj_type_name(obj)); } - gc_aging(objspace, obj); - gc_grey(objspace, obj); } - else { - reachable_objects_from_callback(obj); + + if (UNLIKELY(RB_TYPE_P(obj, T_NONE))) { + rp(obj); + rb_bug("try to mark T_NONE object"); /* check here will help debugging */ } + gc_aging(objspace, obj); + gc_grey(objspace, obj); } static inline void @@ -6891,14 +6887,14 @@ gc_mark_and_pin(rb_objspace_t *objspace, VALUE obj) { if (!is_markable_object(obj)) return; gc_pin(objspace, obj); - gc_mark_ptr(objspace, obj); + reachable_objects_from_callback(obj); } static inline void gc_mark(rb_objspace_t *objspace, VALUE obj) { if (!is_markable_object(obj)) return; - gc_mark_ptr(objspace, obj); + reachable_objects_from_callback(obj); } void @@ -6926,7 +6922,7 @@ rb_gc_mark_and_move(VALUE *ptr) *ptr = rb_gc_location(*ptr); } else { - gc_mark_ptr(objspace, *ptr); + reachable_objects_from_callback(*ptr); } } @@ -8536,7 +8532,6 @@ gc_marks(rb_objspace_t *objspace, int full_mark) bool marking_finished = false; /* setup marking */ - gc_marks_start(objspace, full_mark); if (!is_incremental_marking(objspace)) { gc_marks_rest(objspace); @@ -9210,6 +9205,10 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) /* reason may be clobbered, later, so keep set immediate_sweep here */ objspace->flags.immediate_sweep = !!(reason & GPR_FLAG_IMMEDIATE_SWEEP); + rb_ractor_t *cr = GET_RACTOR(); + struct gc_mark_func_data_struct prev_mfd = cr->mfd; + rb_ractor_init_mfd(cr); + /* Explicitly enable compaction (GC.compact) */ if (do_full_mark && ruby_enable_autocompact) { objspace->flags.during_compacting = TRUE; @@ -9315,6 +9314,8 @@ gc_start(rb_objspace_t *objspace, unsigned int reason) gc_prof_timer_stop(objspace); gc_exit(objspace, gc_enter_event_start, &lock_lev); + + cr->mfd = prev_mfd; return TRUE; } @@ -11671,7 +11672,7 @@ static void reachable_objects_from_callback(VALUE obj) { rb_ractor_t *cr = GET_RACTOR(); - cr->mfd->mark_func(obj, cr->mfd->data); + cr->mfd.mark_func(obj, cr->mfd.data); } void @@ -11688,9 +11689,9 @@ rb_objspace_reachable_objects_from(VALUE obj, void (func)(VALUE, void *), void * struct gc_mark_func_data_struct mfd = { .mark_func = func, .data = data, - }, *prev_mfd = cr->mfd; + }, prev_mfd = cr->mfd; - cr->mfd = &mfd; + cr->mfd = mfd; gc_mark_children(objspace, obj); cr->mfd = prev_mfd; } @@ -11731,9 +11732,9 @@ objspace_reachable_objects_from_root(rb_objspace_t *objspace, void (func)(const struct gc_mark_func_data_struct mfd = { .mark_func = root_objects_from, .data = &data, - }, *prev_mfd = cr->mfd; + }, prev_mfd = cr->mfd; - cr->mfd = &mfd; + cr->mfd = mfd; gc_mark_roots(objspace, &data.category); cr->mfd = prev_mfd; } @@ -13770,6 +13771,13 @@ rb_gcdebug_remove_stress_to_class(int argc, VALUE *argv, VALUE self) return Qnil; } +void +rb_ractor_init_mfd(rb_ractor_t *r) +{ + r->mfd.mark_func = gc_mark_ptr; + r->mfd.data = (void *)&rb_objspace; +} + /* * Document-module: ObjectSpace * diff --git a/internal/gc.h b/internal/gc.h index 29344224744e1d..44808645d00774 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -125,6 +125,13 @@ const char *rb_raw_obj_info(char *const buff, const size_t buff_size, VALUE obj) size_t rb_size_pool_slot_size(unsigned char pool_id); +typedef void (*rb_ractor_value_visitor_func)(VALUE v, void *data); + +struct gc_mark_func_data_struct { + void *data; + rb_ractor_value_visitor_func mark_func; +}; + struct rb_execution_context_struct; /* in vm_core.h */ struct rb_objspace; /* in vm_core.h */ @@ -234,6 +241,7 @@ VALUE rb_gc_id2ref_obj_tbl(VALUE objid); VALUE rb_define_finalizer_no_check(VALUE obj, VALUE block); void rb_gc_mark_and_move(VALUE *ptr); +void rb_ractor_init_mfd(rb_ractor_t *r); #define rb_gc_mark_and_move_ptr(ptr) do { \ VALUE _obj = (VALUE)*(ptr); \ diff --git a/ractor.c b/ractor.c index e0919ac56b1804..165200c5879d51 100644 --- a/ractor.c +++ b/ractor.c @@ -1903,6 +1903,7 @@ ractor_alloc(VALUE klass) VALUE rv = TypedData_Make_Struct(klass, rb_ractor_t, &ractor_data_type, r); FL_SET_RAW(rv, RUBY_FL_SHAREABLE); r->pub.self = rv; + rb_ractor_init_mfd(r); VM_ASSERT(ractor_status_p(r, ractor_created)); return rv; } @@ -1921,7 +1922,7 @@ rb_ractor_main_alloc(void) r->name = Qnil; r->pub.self = Qnil; ruby_single_main_ractor = r; - + rb_ractor_init_mfd(r); return r; } diff --git a/ractor_core.h b/ractor_core.h index 38aded15074bef..4645c058c3e110 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -186,10 +186,7 @@ struct rb_ractor_struct { rb_ractor_newobj_cache_t newobj_cache; // gc.c rb_objspace_reachable_objects_from - struct gc_mark_func_data_struct { - void *data; - void (*mark_func)(VALUE v, void *data); - } *mfd; + struct gc_mark_func_data_struct mfd; }; // rb_ractor_t is defined in vm_core.h