Skip to content

Commit

Permalink
fix: only consider salient bytes in sharecommon eq, hash (#5840)
Browse files Browse the repository at this point in the history
This PR changes `lean_sharecommon_{eq,hash}` to only consider the
salient bytes of an object, and not any bytes of any
unspecified/uninitialized unused capacity.

Accessing uninitialized storage results in undefined behaviour.

This does not seem to have any semantics disadvantages: If objects
compare equal after this change, their salient bytes are still equal. By
contrast, if the actual identity of allocations needs to be
distinguished, that can be done by just comparing pointers to the
storage.

If we wanted to retain the current logic, we would need initialize the
otherwise unused parts to some specific value to avoid the undefined
behaviour.

Closes #5831
  • Loading branch information
tkoeppe authored Nov 19, 2024
1 parent 69530af commit 91c14c7
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
14 changes: 14 additions & 0 deletions src/include/lean/lean.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,13 @@ static inline unsigned lean_ptr_other(lean_object * o) {
small objects is a multiple of LEAN_OBJECT_SIZE_DELTA */
LEAN_EXPORT size_t lean_object_byte_size(lean_object * o);

/* Returns the size of the salient part of an object's storage,
i.e. the parts that contribute to the value representation;
padding or unused capacity is excluded. Operations that read
from an object's storage must only access these parts, since
the non-salient parts may not be initialized. */
LEAN_EXPORT size_t lean_object_data_byte_size(lean_object * o);

static inline bool lean_is_mt(lean_object * o) {
return o->m_rc < 0;
}
Expand Down Expand Up @@ -695,6 +702,9 @@ static inline size_t lean_array_capacity(b_lean_obj_arg o) { return lean_to_arra
static inline size_t lean_array_byte_size(lean_object * o) {
return sizeof(lean_array_object) + sizeof(void*)*lean_array_capacity(o);
}
static inline size_t lean_array_data_byte_size(lean_object * o) {
return sizeof(lean_array_object) + sizeof(void*)*lean_array_size(o);
}
static inline lean_object ** lean_array_cptr(lean_object * o) { return lean_to_array(o)->m_data; }
static inline void lean_array_set_size(u_lean_obj_arg o, size_t sz) {
assert(lean_is_array(o));
Expand Down Expand Up @@ -852,6 +862,9 @@ static inline size_t lean_sarray_byte_size(lean_object * o) {
return sizeof(lean_sarray_object) + lean_sarray_elem_size(o)*lean_sarray_capacity(o);
}
static inline size_t lean_sarray_size(b_lean_obj_arg o) { return lean_to_sarray(o)->m_size; }
static inline size_t lean_sarray_data_byte_size(lean_object * o) {
return sizeof(lean_sarray_object) + lean_sarray_elem_size(o)*lean_sarray_size(o);
}
static inline void lean_sarray_set_size(u_lean_obj_arg o, size_t sz) {
assert(lean_is_exclusive(o));
assert(sz <= lean_sarray_capacity(o));
Expand Down Expand Up @@ -1013,6 +1026,7 @@ static inline char const * lean_string_cstr(b_lean_obj_arg o) {
}
static inline size_t lean_string_size(b_lean_obj_arg o) { return lean_to_string(o)->m_size; }
static inline size_t lean_string_len(b_lean_obj_arg o) { return lean_to_string(o)->m_length; }
static inline size_t lean_string_data_byte_size(lean_object * o) { return sizeof(lean_string_object) + lean_string_size(o); }
LEAN_EXPORT lean_obj_res lean_string_push(lean_obj_arg s, uint32_t c);
LEAN_EXPORT lean_obj_res lean_string_append(lean_obj_arg s1, b_lean_obj_arg s2);
static inline lean_obj_res lean_string_length(b_lean_obj_arg s) { return lean_box(lean_string_len(s)); }
Expand Down
22 changes: 22 additions & 0 deletions src/runtime/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,28 @@ extern "C" LEAN_EXPORT size_t lean_object_byte_size(lean_object * o) {
}
}

extern "C" LEAN_EXPORT size_t lean_object_data_byte_size(lean_object * o) {
if (o->m_cs_sz == 0) {
/* Recall that multi-threaded, single-threaded and persistent objects are stored in the heap.
Persistent objects are multi-threaded and/or single-threaded that have been "promoted" to
a persistent status. */
switch (lean_ptr_tag(o)) {
case LeanArray: return lean_array_data_byte_size(o);
case LeanScalarArray: return lean_sarray_data_byte_size(o);
case LeanString: return lean_string_data_byte_size(o);
default: return lean_small_object_size(o);
}
} else {
/* See comment at `lean_set_non_heap_header`, for small objects we store the object size in the RC field. */
switch (lean_ptr_tag(o)) {
case LeanArray: return lean_array_data_byte_size(o);
case LeanScalarArray: return lean_sarray_data_byte_size(o);
case LeanString: return lean_string_data_byte_size(o);
default: return o->m_cs_sz;
}
}
}

static inline void lean_dealloc(lean_object * o, size_t sz) {
#ifdef LEAN_SMALL_ALLOCATOR
dealloc(o, sz);
Expand Down
6 changes: 3 additions & 3 deletions src/runtime/sharecommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ namespace lean {
extern "C" LEAN_EXPORT uint8 lean_sharecommon_eq(b_obj_arg o1, b_obj_arg o2) {
lean_assert(!lean_is_scalar(o1));
lean_assert(!lean_is_scalar(o2));
size_t sz1 = lean_object_byte_size(o1);
size_t sz2 = lean_object_byte_size(o2);
size_t sz1 = lean_object_data_byte_size(o1);
size_t sz2 = lean_object_data_byte_size(o2);
if (sz1 != sz2) return false;
// compare relevant parts of the header
if (lean_ptr_tag(o1) != lean_ptr_tag(o2)) return false;
Expand All @@ -27,7 +27,7 @@ extern "C" LEAN_EXPORT uint8 lean_sharecommon_eq(b_obj_arg o1, b_obj_arg o2) {

extern "C" LEAN_EXPORT uint64_t lean_sharecommon_hash(b_obj_arg o) {
lean_assert(!lean_is_scalar(o));
size_t sz = lean_object_byte_size(o);
size_t sz = lean_object_data_byte_size(o);
size_t header_sz = sizeof(lean_object);
// hash relevant parts of the header
unsigned init = hash(lean_ptr_tag(o), lean_ptr_other(o));
Expand Down

0 comments on commit 91c14c7

Please sign in to comment.