Skip to content

Commit

Permalink
rb_shape_transition_shape_capa: use optimal sizes transitions
Browse files Browse the repository at this point in the history
Previously the growth was 3(embed), 6, 12, 24, ...

With this change it's now 3(embed), 8, 16, 32, 64, ... by default.

However, since power of two isn't the best size for all allocators,
if `malloc_usable_size` is vailable, we use it to discover the best
offset.

On Linux/glibc 2.35 for instance, the growth will be 3(embed), 7, 15, 31
to avoid wasting 8B per object.

Test program:

```c

size_t test(size_t slots) {
    size_t allocated = slots * VALUE_SIZE;
    void *test_ptr = malloc(allocated);
    size_t wasted = malloc_usable_size(test_ptr) - allocated;
    free(test_ptr);
    fprintf(stderr, "slots = %lu, wasted_bytes = %lu\n", slots, wasted);
    return wasted;
}

int main(int argc, char *argv[]) {
    size_t best_padding = 0;
    size_t padding = 0;
    for (padding = 0; padding <= 2; padding++) {
        size_t wasted = test(8 - padding);
        if (wasted == 0) {
            best_padding = padding;
            break;
        }
    }

    size_t index = 0;
    fprintf(stderr, "=============== naive ================\n");

    size_t list_size = 4;
    for (index = 0; index < 10; index++) {
        test(list_size);
        list_size *= 2;
    }

    fprintf(stderr, "=============== auto-padded (-%lu) ================\n", best_padding);

    list_size = 4;
    for (index = 0; index < 10; index ++) {
        test(list_size - best_padding);
        list_size *= 2;
    }

    fprintf(stderr, "\n\n");
    return 0;
}
```

```
===== glibc ======
slots = 8, wasted_bytes = 8
slots = 7, wasted_bytes = 0
=============== naive ================
slots = 4, wasted_bytes = 8
slots = 8, wasted_bytes = 8
slots = 16, wasted_bytes = 8
slots = 32, wasted_bytes = 8
slots = 64, wasted_bytes = 8
slots = 128, wasted_bytes = 8
slots = 256, wasted_bytes = 8
slots = 512, wasted_bytes = 8
slots = 1024, wasted_bytes = 8
slots = 2048, wasted_bytes = 8
=============== auto-padded (-1) ================
slots = 3, wasted_bytes = 0
slots = 7, wasted_bytes = 0
slots = 15, wasted_bytes = 0
slots = 31, wasted_bytes = 0
slots = 63, wasted_bytes = 0
slots = 127, wasted_bytes = 0
slots = 255, wasted_bytes = 0
slots = 511, wasted_bytes = 0
slots = 1023, wasted_bytes = 0
slots = 2047, wasted_bytes = 0
```

```
==========  jemalloc =======
slots = 8, wasted_bytes = 0
=============== naive ================
slots = 4, wasted_bytes = 0
slots = 8, wasted_bytes = 0
slots = 16, wasted_bytes = 0
slots = 32, wasted_bytes = 0
slots = 64, wasted_bytes = 0
slots = 128, wasted_bytes = 0
slots = 256, wasted_bytes = 0
slots = 512, wasted_bytes = 0
slots = 1024, wasted_bytes = 0
slots = 2048, wasted_bytes = 0
=============== auto-padded (-0) ================
slots = 4, wasted_bytes = 0
slots = 8, wasted_bytes = 0
slots = 16, wasted_bytes = 0
slots = 32, wasted_bytes = 0
slots = 64, wasted_bytes = 0
slots = 128, wasted_bytes = 0
slots = 256, wasted_bytes = 0
slots = 512, wasted_bytes = 0
slots = 1024, wasted_bytes = 0
slots = 2048, wasted_bytes = 0
```
  • Loading branch information
byroot committed Oct 13, 2023
1 parent 842d9c0 commit c87e397
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 4 deletions.
2 changes: 1 addition & 1 deletion ext/objspace/objspace_dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ shape_i(rb_shape_t *shape, void *data)
dump_append(dc, "\"OBJ_TOO_COMPLEX\"");
break;
default:
rb_bug("[objspace] unexpected shape type");
rb_bug("[objspace] unexpected shape type: %u", shape->type);
}

dump_append(dc, ", \"edges\":");
Expand Down
47 changes: 47 additions & 0 deletions gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,51 @@
#define MAP_ANONYMOUS MAP_ANON
#endif


static size_t malloc_offset = 0;
#ifdef HAVE_MALLOC_USABLE_SIZE
static size_t
gc_compute_malloc_offset(void)
{
// Different allocators use different metadata storage strategies which result in different
// ideal sizes.
// For instance malloc(64) will waste 8B with glibc, but waste 0B with jemalloc.
// But malloc(56) will waste 0B with glibc, but waste 8B with jemalloc.
// So we try allocating 64, 56 and 48 bytes and select the first offset that doesn't
// waste memory.
// This was tested on Linux with glibc 2.35 and jemalloc 5, and for both it result in
// no wasted memory.
size_t offset = 0;
for (offset = 0; offset <= 16; offset += 8) {
size_t allocated = (64 - offset);
void *test_ptr = malloc(allocated);
size_t wasted = malloc_usable_size(test_ptr) - allocated;
free(test_ptr);

if (wasted == 0) {
return offset;
}
}
return 0;
}
#else
static size_t
gc_compute_malloc_offset(void)
{
// If we don't have malloc_usable_size, we use powers of 2.
return 0;
}
#endif

size_t
rb_malloc_grow_capa(size_t current_capacity, size_t type_size)
{
current_capacity *= type_size;
size_t new_capacity = 1 << (CHAR_BIT*SIZEOF_SIZE_T - nlz_long((current_capacity * 2) - 1));
new_capacity -= malloc_offset;
return new_capacity / type_size;
}

static inline struct rbimpl_size_mul_overflow_tag
size_add_overflow(size_t x, size_t y)
{
Expand Down Expand Up @@ -13984,6 +14029,8 @@ void
Init_GC(void)
{
#undef rb_intern
malloc_offset = gc_compute_malloc_offset();

VALUE rb_mObjSpace;
VALUE rb_mProfiler;
VALUE gc_constants;
Expand Down
1 change: 1 addition & 0 deletions internal/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ __attribute__((__alloc_align__(1)))
RUBY_ATTR_MALLOC void *rb_aligned_malloc(size_t, size_t) RUBY_ATTR_ALLOC_SIZE((2));
size_t rb_size_mul_or_raise(size_t, size_t, VALUE); /* used in compile.c */
size_t rb_size_mul_add_or_raise(size_t, size_t, size_t, VALUE); /* used in iseq.h */
size_t rb_malloc_grow_capa(size_t current_capacity, size_t type_size);
RUBY_ATTR_MALLOC void *rb_xmalloc_mul_add(size_t, size_t, size_t);
RUBY_ATTR_MALLOC void *rb_xcalloc_mul_add(size_t, size_t, size_t);
void *rb_xrealloc_mul_add(const void *, size_t, size_t, size_t);
Expand Down
8 changes: 5 additions & 3 deletions shape.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,10 @@ rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id)
}

static inline rb_shape_t *
rb_shape_transition_shape_capa_create(rb_shape_t* shape, uint32_t new_capacity)
rb_shape_transition_shape_capa_create(rb_shape_t* shape, size_t new_capacity)
{
RUBY_ASSERT(new_capacity < (size_t)MAX_IVARS);

ID edge_name = rb_make_temporary_id(new_capacity);
bool dont_care;
rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care, true, false);
Expand All @@ -430,7 +432,7 @@ rb_shape_transition_shape_capa_create(rb_shape_t* shape, uint32_t new_capacity)
rb_shape_t *
rb_shape_transition_shape_capa(rb_shape_t* shape)
{
return rb_shape_transition_shape_capa_create(shape, shape->capacity * 2);
return rb_shape_transition_shape_capa_create(shape, rb_malloc_grow_capa(shape->capacity, sizeof(VALUE)));
}

bool
Expand Down Expand Up @@ -833,7 +835,7 @@ Init_default_shapes(void)

// Shapes by size pool
for (int i = 1; i < SIZE_POOL_COUNT; i++) {
uint32_t capa = (uint32_t)((rb_size_pool_slot_size(i) - offsetof(struct RObject, as.ary)) / sizeof(VALUE));
size_t capa = ((rb_size_pool_slot_size(i) - offsetof(struct RObject, as.ary)) / sizeof(VALUE));
rb_shape_t * new_shape = rb_shape_transition_shape_capa_create(root, capa);
new_shape->type = SHAPE_INITIAL_CAPACITY;
new_shape->size_pool_index = i;
Expand Down

0 comments on commit c87e397

Please sign in to comment.