From 2d83be32927109073eb7dd0b6dc31da51f528a97 Mon Sep 17 00:00:00 2001 From: Elinor Fung Date: Wed, 14 Feb 2024 17:02:33 -0800 Subject: [PATCH 1/5] Add datastream as a native object library --- src/coreclr/CMakeLists.txt | 3 + .../dlls/mscoree/coreclr/CMakeLists.txt | 1 + src/coreclr/vm/CMakeLists.txt | 2 + src/coreclr/vm/ceemain.cpp | 5 + src/coreclr/vm/debug_stream.cpp | 20 + src/coreclr/vm/debug_stream.h | 14 + src/native/datastream/CMakeLists.txt | 8 + src/native/datastream/data_stream.c | 742 ++++++++++++++++++ src/native/datastream/data_stream.h | 161 ++++ 9 files changed, 956 insertions(+) create mode 100644 src/coreclr/vm/debug_stream.cpp create mode 100644 src/coreclr/vm/debug_stream.h create mode 100644 src/native/datastream/CMakeLists.txt create mode 100644 src/native/datastream/data_stream.c create mode 100644 src/native/datastream/data_stream.h diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index 1c314d9bf624e..a9efb48a04e9f 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -135,6 +135,9 @@ if(CLR_CMAKE_TARGET_WIN32) add_subdirectory(gc/sample) endif() +# Data stream object library that will be linked in to the CLR +add_subdirectory(${CLR_SRC_NATIVE_DIR}/datastream ${CLR_ARTIFACTS_OBJ_DIR}/datastream/${CLR_CMAKE_TARGET_OS}.${CLR_CMAKE_TARGET_ARCH}.${CMAKE_BUILD_TYPE}) + #------------------------------------- # Include directory directives #------------------------------------- diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt index 2e2a8bf87eccd..a0888a12cdcdb 100644 --- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt @@ -109,6 +109,7 @@ set(CORECLR_LIBRARIES interop coreclrminipal gc_pal + datastream ) if(CLR_CMAKE_TARGET_WIN32) diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index ccd8bc35c8bf4..ab4d2e212f06a 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -310,6 +310,7 @@ set(VM_SOURCES_WKS customattribute.cpp custommarshalerinfo.cpp autotrace.cpp + debug_stream.cpp dllimport.cpp dllimportcallback.cpp dynamicinterfacecastable.cpp @@ -410,6 +411,7 @@ set(VM_HEADERS_WKS customattribute.h custommarshalerinfo.h autotrace.h + debug_stream.h diagnosticserveradapter.h dllimport.h dllimportcallback.h diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index 0e546ffa12063..bb72029e01bac 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -163,6 +163,7 @@ #include "jithost.h" #include "pgo.h" #include "pendingload.h" +#include "debug_stream.h" #ifndef TARGET_UNIX #include "dwreport.h" @@ -822,6 +823,10 @@ void EEStartupHelper() InitializeDebugger(); // throws on error #endif // DEBUGGING_SUPPORTED + // Initialize the debug stream in the runtime. + if (!debug_stream::init()) + IfFailGo(E_FAIL); + #ifdef PROFILING_SUPPORTED // Initialize the profiling services. hr = ProfilingAPIUtility::InitializeProfiling(); diff --git a/src/coreclr/vm/debug_stream.cpp b/src/coreclr/vm/debug_stream.cpp new file mode 100644 index 0000000000000..46a8a61401537 --- /dev/null +++ b/src/coreclr/vm/debug_stream.cpp @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "debug_stream.h" +#include +#include + +namespace +{ + data_stream_context_t g_data_streams; +} + +bool debug_stream::init() +{ + size_t sizes[] = { 4096, 8192, 2048 }; + if (!dnds_init(&g_data_streams, ARRAY_SIZE(sizes), sizes)) + return false; + + return true; +} diff --git a/src/coreclr/vm/debug_stream.h b/src/coreclr/vm/debug_stream.h new file mode 100644 index 0000000000000..9a063fbd983da --- /dev/null +++ b/src/coreclr/vm/debug_stream.h @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef DEBUG_STREAM_H +#define DEBUG_STREAM_H + +#include + +namespace debug_stream +{ + bool init(); +} + +#endif // DEBUG_STREAM_H diff --git a/src/native/datastream/CMakeLists.txt b/src/native/datastream/CMakeLists.txt new file mode 100644 index 0000000000000..3f69810316012 --- /dev/null +++ b/src/native/datastream/CMakeLists.txt @@ -0,0 +1,8 @@ +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +set(DATASTREAM_SOURCES + data_stream.c + data_stream.h +) + +add_library(datastream OBJECT ${DATASTREAM_SOURCES}) diff --git a/src/native/datastream/data_stream.c b/src/native/datastream/data_stream.c new file mode 100644 index 0000000000000..c2ba36b3fdd84 --- /dev/null +++ b/src/native/datastream/data_stream.c @@ -0,0 +1,742 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include +#include + +#include + +#ifdef HOST_WINDOWS +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include + +#define _Atomic(t) t +static void* atomic_load(_Atomic(void*)* ptr) +{ + void* p = *ptr; +#ifdef _M_ARM64 + MemoryBarrier(); +#endif // _M_ARM64 + return p; +} + +static bool atomic_compare_exchange_strong(_Atomic(void*)* obj, void** expected, void* desired) +{ + void* comperand = *expected; + *expected = InterlockedCompareExchangePointer(obj, desired, comperand); + return comperand == *expected; +} + +static bool atomic_compare_exchange_weak(_Atomic(void*)* obj, void** expected, void* desired) +{ + return atomic_compare_exchange_strong(obj, expected, desired); +} + +#define strdup(...) _strdup(__VA_ARGS__) + +#else +#include +#endif // !HOST_WINDOWS + +#ifndef EXTERN_C +#ifdef __cplusplus +#define EXTERN_C extern "C" +#else +#define EXTERN_C +#endif // __cplusplus +#endif // EXTERN_C + +static uint16_t const stream_version = 1; +static uint32_t const endian_magic_value = 0x646e6300; +static uint8_t const big_endian[sizeof(endian_magic_value)] = { 0x64, 0x6e, 0x63, 0 }; +static uint8_t const little_endian[sizeof(endian_magic_value)] = { 0, 0x63, 0x6e, 0x64 }; + +#ifdef __cplusplus +EXTERN_C { +#endif // __cplusplus + +typedef struct data_block__ +{ + void* begin; + _Atomic(void*) pos; + void* end; + _Atomic(struct data_block__*) prev; +} data_block_t; +static_assert(sizeof(data_block_t) % sizeof(void*) == 0, "Block data allocations assume pointer alignment"); + +typedef struct data_stream__ +{ + _Atomic(data_block_t*) curr; + size_t block_data_size; + size_t max_data_size; + data_stream_context_t* cxt; +} data_stream_t; + +typedef struct stream_entry__ +{ + uint32_t offset_next; + uint32_t reserved; + uint8_t data[]; +} stream_entry_t; + +#ifdef __cplusplus +} +#endif // __cplusplus + +EXTERN_C ds_validate_t dnds_validate(uint32_t m) +{ + if (memcmp(&m, big_endian, sizeof(big_endian)) == 0) + { + return dsv_big_endian; + } + else if (memcmp(&m, little_endian, sizeof(little_endian)) == 0) + { + return dsv_little_endian; + } + else + { + return dsv_invalid; + } +} + +EXTERN_C bool dnds_is_big_endian(data_stream_context_t* cxt) +{ + assert(cxt != NULL && dnds_validate(cxt->magic) != dsv_invalid); + return memcmp(&cxt->magic, big_endian, sizeof(big_endian)) == 0 + ? true + : false; +} + +// Used to perform a single allocation during initialization. +static uint8_t* initial_allocation( + size_t base_size, + size_t stream_count, + size_t const* stream_byte_lengths, + size_t* allocated_size) +{ + assert(base_size > 0); + assert(stream_count > 0); + assert(stream_byte_lengths != NULL); + assert(allocated_size != NULL); + + size_t total_streams = 0; + for (size_t i = 0; i < stream_count; ++i) + { + // Stream size must be large enough for an entry. + if (stream_byte_lengths[i] < sizeof(stream_entry_t)) + return NULL; + total_streams += stream_byte_lengths[i]; + } + + *allocated_size = base_size + total_streams; + return (uint8_t*)malloc(*allocated_size); +} + +EXTERN_C bool dnds_init( + data_stream_context_t* cxt, + uint32_t stream_count, + size_t const* stream_byte_lengths) +{ + if (cxt == NULL || stream_count == 0) + { + return false; + } + + // Initialize basic details + cxt->magic = endian_magic_value; + cxt->size = sizeof(*cxt); + cxt->version = stream_version; + + // Perform a single allocation for initialization. + size_t total_allocation; + + // [TODO] Arithmetic overflow + size_t steams_collection_size = (size_t)stream_count * (sizeof(data_stream_t) + sizeof(data_block_t)); + uint8_t* mem = initial_allocation( + steams_collection_size, + stream_count, + stream_byte_lengths, + &total_allocation); + if (mem == NULL) + return false; + + uint8_t const * const end = mem + total_allocation; + cxt->streams_count = stream_count; + cxt->streams = (data_stream_t*)mem; + + // Move past the stream collection. + // The remaining memory is for the blocks and data. + mem += stream_count * sizeof(data_stream_t); + + // Zero initialize all stream memory. + memset(mem, 0, total_allocation - steams_collection_size); + + // Now initialize each stream + for (size_t i = 0; i < stream_count; ++i) + { + size_t len = stream_byte_lengths[i]; + assert(len != 0); + + // Initialize the blocks + data_block_t* blk = (data_block_t*)mem; + mem += sizeof(data_block_t); + assert(((intptr_t)mem) % sizeof(void*) == 0); + blk->begin = mem; + blk->pos = mem + len; + blk->end = blk->pos; + blk->prev = NULL; + + // Increment the memory counter. + mem += len; + + // Initialize the current stream with the new block + data_stream_t* curr = &cxt->streams[i]; + curr->curr = blk; + curr->block_data_size = len; + assert(len >= sizeof(stream_entry_t)); + curr->max_data_size = len - sizeof(stream_entry_t); + curr->cxt = cxt; + } + + assert(mem == end); + return true; +} + +EXTERN_C void dnds_destroy(data_stream_context_t* cxt) +{ + assert(dnds_validate(cxt->magic) != dsv_invalid); + if (cxt->streams != NULL) + { + for (uint32_t i = 0; i < cxt->streams_count; ++i) + { + // The streams themselves are all allocated + // in a chunk during initialization. However, + // any chained streams need to be deallocated + // before the larger stream data can be freed. + data_block_t* blk = cxt->streams[i].curr; + while (blk != NULL) + { + data_block_t* tmp = blk->prev; + + // If the current block is the last block (prev == NULL) + // in the chain, don't delete it since it was allocated + // with the stream itself. + if (tmp == NULL) + break; + + free(blk); + blk = tmp; + } + } + free(cxt->streams); + cxt->streams_count = 0; + cxt->streams = NULL; + } +} + +static stream_entry_t* alloc_entry(data_block_t* blk, size_t needed) +{ + assert(blk != NULL); + size_t total_needed = sizeof(stream_entry_t) + needed; + assert(needed < total_needed); + + size_t avail; + void* update; + void* expected = atomic_load(&blk->pos); + do + { + assert((intptr_t)expected >= (intptr_t)blk->begin); + avail = (intptr_t)expected - (intptr_t)blk->begin; + if (avail < total_needed) + return NULL; + + update = (void*)((intptr_t)expected - total_needed); + } + while (!atomic_compare_exchange_weak(&blk->pos, &expected, update)); + + // Initialize the entry + memset(update, 0, total_needed); + stream_entry_t* new_entry = (stream_entry_t*)update; + new_entry->offset_next = (uint32_t)total_needed; + return new_entry; +} + +static bool expand_stream(data_stream_t* s) +{ + assert(s != NULL); + size_t len = s->block_data_size; + + // Create new block + uint8_t* mem = (uint8_t*)malloc(sizeof(data_block_t) + len); + if (mem == NULL) + return false; + + // Get memory beyond the data_block_t. + data_block_t* blk = (data_block_t*)mem; + mem += sizeof(data_block_t); + memset(mem, 0, len); + + // Initialize block + assert(((intptr_t)mem) % sizeof(void*) == 0); + blk->begin = mem; + blk->pos = mem + len; + blk->end = blk->pos; + + // Set the current block as the new previous one. + data_block_t* expected = atomic_load(&s->curr); + blk->prev = expected; + + // Attempt to expand the stream. If it fails, we lost the race. + if (!atomic_compare_exchange_strong(&s->curr, &expected, blk)) + free(blk); + + return true; +} + +static stream_entry_t* alloc_stream_entry(data_stream_t* s, size_t needed) +{ + stream_entry_t* e = alloc_entry(atomic_load(&s->curr), needed); + while (e == NULL) + { + if (!expand_stream(s)) + return NULL; + e = alloc_entry(atomic_load(&s->curr), needed); + } + return e; +} + +EXTERN_C bool dnds_define_type( + data_stream_context_t* cxt, + type_details_t const* details, + size_t total_size, + size_t offsets_length, + field_offset_t const* offsets) +{ + if (cxt == NULL + || cxt->streams_count == 0 + || details == NULL) + { + return false; + } + assert(details->reserved == 0); + assert(dnds_validate(cxt->magic) != dsv_invalid); + + data_stream_t* str = &cxt->streams[0]; + + // [TODO] Arithmetic overflow + size_t offsets_bytes = offsets_length * sizeof(field_offset_t); + size_t needed = sizeof(type_details_t*) + sizeof(total_size) + offsets_bytes; + stream_entry_t* entry = alloc_stream_entry(str, needed); + if (entry == NULL) + return false; + + uint8_t* curr = entry->data; + memcpy(curr, &details, sizeof(type_details_t*)); + curr += sizeof(type_details_t*); + memcpy(curr, &total_size, sizeof(total_size)); + curr += sizeof(total_size); + memcpy(curr, offsets, offsets_bytes); + curr += offsets_bytes; + assert(entry->offset_next == (curr - (uint8_t*)entry)); + return true; +} + +EXTERN_C data_stream_t* dnds_get_stream( + data_stream_context_t* cxt, + size_t id) +{ + if (cxt == NULL || id >= cxt->streams_count || id == 0) + { + return false; + } + assert(dnds_validate(cxt->magic) != dsv_invalid); + + return &cxt->streams[id]; +} + +EXTERN_C bool dnds_record_instance( + data_stream_t* str, + uint16_t type, + void* inst) +{ + if (str == NULL) + return false; + + stream_entry_t* entry = alloc_stream_entry(str, sizeof(type) + sizeof(inst)); + if (entry == NULL) + return false; + + uint8_t* curr = entry->data; + memcpy(curr, &type, sizeof(type)); + curr += sizeof(type); + memcpy(curr, &inst, sizeof(inst)); + curr += sizeof(inst); + assert(entry->offset_next == (curr - (uint8_t*)entry)); + return true; +} + +EXTERN_C bool dnds_record_blob( + data_stream_t* str, + uint16_t type, + uint16_t size, + void* inst) +{ + if (str == NULL) + return false; + + size_t needed_size = sizeof(type) + sizeof(size) + (size_t)size; + if (needed_size > str->max_data_size) + return false; + + stream_entry_t* entry = alloc_stream_entry(str, needed_size); + if (entry == NULL) + return false; + + uint8_t* curr = entry->data; + memcpy(curr, &type, sizeof(type)); + curr += sizeof(type); + memcpy(curr, &size, sizeof(size)); + curr += sizeof(size); + memcpy(curr, inst, size); + curr += size; + assert(entry->offset_next == (curr - (uint8_t*)entry)); + return true; +} + +static bool read_local(memory_reader_t* r, intptr_t m, size_t* i, void** ptr) +{ + *ptr = (void*)m; + return true; +} + +static void free_local(memory_reader_t* r, size_t len, void* ptr) +{ + // nop +} + +static memory_reader_t g_memory_reader_local = { read_local, free_local }; + +static bool read_in_block_data(memory_reader_t* reader, data_block_t* block, size_t* data_read, void** data, data_block_t** prev_block) +{ + bool result = false; + size_t remote_block_read = sizeof(data_block_t); + data_block_t* remote_block = NULL; + if (!reader->read_ptr(reader, (intptr_t)block, &remote_block_read, (void**)&remote_block)) + goto cleanup; + + *data_read = (intptr_t)remote_block->end - (intptr_t)remote_block->pos; + if (!reader->read_ptr(reader, (intptr_t)remote_block->pos, data_read, data)) + goto cleanup; + + *prev_block = remote_block->prev; + result = true; +cleanup: + if (remote_block) reader->free_ptr(reader, remote_block_read, remote_block); + return result; +} + +static bool read_in_details(memory_reader_t* reader, type_details_t* details, type_details_t* local_details) +{ + assert(reader != NULL); + assert(details != NULL); + assert(local_details != NULL); + + char* name = NULL; + size_t name_read = 0; + + bool result = false; + type_details_t* remote_details = NULL; + size_t remote_details_read = sizeof(*remote_details); + if (!reader->read_ptr(reader, (intptr_t)details, &remote_details_read, (void**)&remote_details)) + goto cleanup; + + name_read = remote_details->name_len; + if (!reader->read_ptr(reader, (intptr_t)remote_details->name, &name_read, (void**)&name)) + goto cleanup; + + *local_details = *remote_details; + local_details->name = strdup(name); + if (local_details->name == NULL) + goto cleanup; + + result = true; +cleanup: + if (remote_details) reader->free_ptr(reader, remote_details_read, remote_details); + if (name) reader->free_ptr(reader, name_read, name); + return result; +} + +static bool enum_type( + on_next_type on_next, + void* user_defined, + memory_reader_t* reader, + stream_entry_t* curr, + stream_entry_t* end) +{ + assert(on_next != NULL); + assert(reader != NULL); + assert(curr <= end); + + // Iterate over entries + intptr_t mem_pos; + stream_entry_t* next; + while (curr < end) + { + // Compute the next entry + assert(curr->offset_next != 0); + next = (stream_entry_t*)(((uint8_t*)curr) + curr->offset_next); + + // We compute the memory position based on the start of data. + mem_pos = (intptr_t)curr->data; + + type_details_t* details; + memcpy(&details, (void*)mem_pos, sizeof(type_details_t*)); + mem_pos += sizeof(type_details_t*); + + type_details_t local_details; + if (!read_in_details(reader, details, &local_details)) + return false; + + size_t total_size; + memcpy(&total_size, (void*)mem_pos, sizeof(total_size)); + mem_pos += sizeof(total_size); + + size_t offsets_length = ((intptr_t)next - mem_pos) / sizeof(field_offset_t); + bool cont = on_next(&local_details, total_size, offsets_length, (field_offset_t*)mem_pos, user_defined); + free((void*)local_details.name); + if (!cont) + break; + + curr = next; + } + return true; +} + +EXTERN_C bool dnds_enum_type( + data_stream_context_t* cxt, + on_next_type on_next, + void* user_defined, + memory_reader_t* reader) +{ + if (cxt == NULL + || cxt->streams_count == 0 + || on_next == NULL) + { + return false; + } + assert(dnds_validate(cxt->magic) != dsv_invalid); + + if (reader == NULL) + reader = &g_memory_reader_local; + + bool result = false; + + size_t data_read = 0; + void* data = NULL; + + // Read in the streams collection + size_t streams_read = (size_t)cxt->streams_count * sizeof(data_stream_t); + data_stream_t* streams = NULL; + if (!reader->read_ptr(reader, (intptr_t)cxt->streams, &streams_read, (void**)&streams)) + goto cleanup; + + // The first stream is the types' stream. + data_stream_t* types_stream = &streams[0]; + + // [TODO] Read all blocks in the stream + data_block_t* types_block = types_stream->curr; + if (!read_in_block_data(reader, types_block, &data_read, &data, &types_block)) + goto cleanup; + + stream_entry_t* entry = (stream_entry_t*)data; + stream_entry_t* end = (stream_entry_t*)((intptr_t)data + data_read); + if (!enum_type(on_next, user_defined, reader, entry, end)) + goto cleanup; + + result = true; +cleanup: + if (streams) reader->free_ptr(reader, streams_read, streams); + if (data) reader->free_ptr(reader, data_read, data); + return result; +} + +static bool enum_blobs( + on_next_blob on_next, + void* user_defined, + memory_reader_t* reader, + stream_entry_t* curr, + stream_entry_t* end) +{ + assert(on_next != NULL); + assert(reader != NULL); + assert(curr <= end); + + // Iterate over entries + intptr_t mem_pos; + stream_entry_t* next; + while (curr < end) + { + // Compute the next entry + assert(curr->offset_next != 0); + next = (stream_entry_t*)(((uint8_t*)curr) + curr->offset_next); + mem_pos = (intptr_t)curr->data; + + uint16_t type; + memcpy(&type, (void*)mem_pos, sizeof(type)); + mem_pos += sizeof(type); + + uint16_t size; + memcpy(&size, (void*)mem_pos, sizeof(size)); + mem_pos += sizeof(size); + + void* inst = (void*)mem_pos; + mem_pos += size; + + bool cont = on_next(type, (uint16_t)size, inst, user_defined); + if (!cont) + break; + + curr = next; + } + return true; +} + +EXTERN_C bool dnds_enum_blobs( + data_stream_context_t* cxt, + on_next_blob on_next, + void* user_defined, + memory_reader_t* reader) +{ + if (cxt == NULL + || cxt->streams_count <= 1 + || on_next == NULL) + { + return false; + } + assert(dnds_validate(cxt->magic) != dsv_invalid); + + if (reader == NULL) + reader = &g_memory_reader_local; + + bool result = false; + + size_t data_read = 0; + void* data = NULL; + + // Read in the streams collection + size_t streams_read = (size_t)cxt->streams_count * sizeof(data_stream_t); + data_stream_t* streams = NULL; + if (!reader->read_ptr(reader, (intptr_t)cxt->streams, &streams_read, (void**)&streams)) + goto cleanup; + + // [TODO] Enumerate all streams + data_stream_t* str = &streams[1]; + + // Read the stream's blocks + // [TODO] Read all blocks in the stream + data_block_t* block = str->curr; + if (!read_in_block_data(reader, block, &data_read, &data, &block)) + goto cleanup; + + stream_entry_t* entry = (stream_entry_t*)data; + stream_entry_t* end = (stream_entry_t*)((intptr_t)data + data_read); + if (!enum_blobs(on_next, user_defined, reader, entry, end)) + goto cleanup; + + result = true; +cleanup: + if (streams) reader->free_ptr(reader, streams_read, streams); + if (data) reader->free_ptr(reader, data_read, data); + return result; +} + +static bool enum_instances( + on_next_instance on_next, + void* user_defined, + memory_reader_t* reader, + stream_entry_t* curr, + stream_entry_t* end) +{ + assert(on_next != NULL); + assert(reader != NULL); + assert(curr <= end); + + // Iterate over entries + intptr_t mem_pos; + stream_entry_t* next; + while (curr < end) + { + // Compute the next entry + assert(curr->offset_next != 0); + next = (stream_entry_t*)(((uint8_t*)curr) + curr->offset_next); + mem_pos = (intptr_t)curr->data; + + uint16_t type; + memcpy(&type, (void*)mem_pos, sizeof(type)); + mem_pos += sizeof(type); + + intptr_t inst; + memcpy(&inst, (void*)mem_pos, sizeof(inst)); + mem_pos += sizeof(inst); + + bool cont = on_next(type, inst, user_defined); + if (!cont) + break; + + curr = next; + } + return true; +} + +EXTERN_C bool dnds_enum_instances( + data_stream_context_t* cxt, + on_next_instance on_next, + void* user_defined, + memory_reader_t* reader) +{ + if (cxt == NULL + || cxt->streams_count <= 2 + || on_next == NULL) + { + return false; + } + assert(dnds_validate(cxt->magic) != dsv_invalid); + + if (reader == NULL) + reader = &g_memory_reader_local; + + bool result = false; + + size_t data_read = 0; + void* data = NULL; + + // Read in the streams collection + size_t streams_read = (size_t)cxt->streams_count * sizeof(data_stream_t); + data_stream_t* streams = NULL; + if (!reader->read_ptr(reader, (intptr_t)cxt->streams, &streams_read, (void**)&streams)) + goto cleanup; + + // [TODO] Enumerate all streams + data_stream_t* str = &streams[2]; + + // Read the stream's blocks + // [TODO] Read all blocks in the stream + data_block_t* block = str->curr; + if (!read_in_block_data(reader, block, &data_read, &data, &block)) + goto cleanup; + + stream_entry_t* entry = (stream_entry_t*)data; + stream_entry_t* end = (stream_entry_t*)((intptr_t)data + data_read); + if (!enum_instances(on_next, user_defined, reader, entry, end)) + goto cleanup; + + result = true; +cleanup: + if (streams) reader->free_ptr(reader, streams_read, streams); + if (data) reader->free_ptr(reader, data_read, data); + return result; +} diff --git a/src/native/datastream/data_stream.h b/src/native/datastream/data_stream.h new file mode 100644 index 0000000000000..3b1fb7d48bf5e --- /dev/null +++ b/src/native/datastream/data_stream.h @@ -0,0 +1,161 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef DN_DATA_STREAM_H +#define DN_DATA_STREAM_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif // __cplusplus + +// Forward declarations +typedef struct data_stream__ data_stream_t; + +typedef struct data_stream_context__ +{ + uint32_t magic; // Value used to confirm the pointer and check endianness of the target machine. + uint16_t size; // The current size of this data structure. + uint16_t version; // The version of the data stream binary format. + + uint32_t reserved; // Reserved. + uint32_t streams_count; // Count of statically allocated streams. + data_stream_t* streams; // Statically allocated streams. +} data_stream_context_t; + +// Validation results for a data stream. +typedef enum +{ + dsv_invalid, + dsv_little_endian, + dsv_big_endian, +} ds_validate_t; + +// Validate the magic number is valid +ds_validate_t dnds_validate(uint32_t magic); + +// Determine if the data stream is big or little endian. +bool dnds_is_big_endian(data_stream_context_t*); + +// +// Target APIs +// + +// Initialize the data stream. +// Statically allocates streams with a default block size. +bool dnds_init( + data_stream_context_t*, + uint32_t stream_count, + size_t const* stream_byte_lengths); + +// Deallocate all memory associated with the data stream. +void dnds_destroy(data_stream_context_t*); + +// Define a maximum value for the type datatype. +#define MAX_TYPE_SIZE UINT16_MAX + +typedef struct type_details__ +{ + uint16_t type; + uint16_t version; + uint16_t reserved; // Must be zero + uint16_t name_len; // Includes null + char const* name; +} type_details_t; +static_assert((offsetof(type_details_t, name) % sizeof(void*)) == 0, "Pointer field should be pointer aligned"); + +typedef struct field_offset__ +{ + uint16_t offset; + uint16_t type; +} field_offset_t; +static_assert(sizeof(field_offset_t) == sizeof(uint32_t), "Field offset structures should be 4 bytes"); + +// Define a type in the data stream +bool dnds_define_type( + data_stream_context_t*, + type_details_t const* details, + size_t total_size, + size_t offsets_length, + field_offset_t const* offsets); + +// Get an available stream to write to +data_stream_t* dnds_get_stream( + data_stream_context_t*, + size_t id); + +// Record an instance in the stream +bool dnds_record_instance( + data_stream_t*, + uint16_t type, + void* inst); + +// Record a data blob in the stream +bool dnds_record_blob( + data_stream_t*, + uint16_t type, + uint16_t size, + void* inst); + +// +// Reader APIs +// + +typedef struct memory_reader__ +{ + bool(*read_ptr)(struct memory_reader__*,intptr_t,size_t*,void**); + void(*free_ptr)(struct memory_reader__*,size_t,void*); +} memory_reader_t; + +// Return false to stop enumeration +typedef bool(*on_next_type)( + type_details_t const* details, + size_t total_size, + size_t offsets_length, + field_offset_t const* offsets, + void* user_defined); + +// Enumerate the types in the data stream +bool dnds_enum_type( + data_stream_context_t*, + on_next_type on_next, + void* user_defined, + memory_reader_t* reader); + +// Return false to stop enumeration +typedef bool(*on_next_blob)( + uint16_t type, + uint16_t data_size_bytes, + void* data, + void* user_defined); + +// Enumerate all blobs in the data stream +bool dnds_enum_blobs( + data_stream_context_t*, + on_next_blob on_next, + void* user_defined, + memory_reader_t* reader); + +// Return false to stop enumeration +typedef bool(*on_next_instance)( + uint16_t type, + intptr_t instance, + void* user_defined); + +// Enumerate all instances in the data stream +bool dnds_enum_instances( + data_stream_context_t*, + on_next_instance on_next, + void* user_defined, + memory_reader_t* reader); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // DN_DATA_STREAM_H From b21c00d3a38ee3dd950e20c772bc02efa383b595 Mon Sep 17 00:00:00 2001 From: Elinor Fung Date: Thu, 7 Mar 2024 14:46:47 -0800 Subject: [PATCH 2/5] Export dnds_* APIs --- src/coreclr/components.cmake | 1 + src/native/datastream/CMakeLists.txt | 11 ++++++++++ src/native/datastream/data_stream.c | 20 +++++++++---------- src/native/datastream/data_stream.h | 30 ++++++++++++++++++---------- 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/src/coreclr/components.cmake b/src/coreclr/components.cmake index 70dd081376f67..617dbd5e1e98d 100644 --- a/src/coreclr/components.cmake +++ b/src/coreclr/components.cmake @@ -8,6 +8,7 @@ add_component(iltools) add_component(nativeaot) add_component(spmi) add_component(debug) +add_component(cdac) # Define coreclr_all as the fallback component and make every component depend on this component. # iltools and paltests should be minimal subsets, so don't add a dependency on coreclr_misc diff --git a/src/native/datastream/CMakeLists.txt b/src/native/datastream/CMakeLists.txt index 3f69810316012..b75dae0e84a25 100644 --- a/src/native/datastream/CMakeLists.txt +++ b/src/native/datastream/CMakeLists.txt @@ -6,3 +6,14 @@ set(DATASTREAM_SOURCES ) add_library(datastream OBJECT ${DATASTREAM_SOURCES}) + +add_library_clr(datastreamlib SHARED ${DATASTREAM_SOURCES}) +target_compile_options(datastreamlib PRIVATE -DBUILD_SHARED_LIBRARY) + +if(CLR_CMAKE_TARGET_WIN32) + install(FILES $ DESTINATION datastream COMPONENT cdac RENAME datastream.obj) +else() + install(FILES $ DESTINATION datastream COMPONENT cdac RENAME datastream.o) +endif() + +install_clr(TARGETS datastreamlib DESTINATIONS . COMPONENT cdac) diff --git a/src/native/datastream/data_stream.c b/src/native/datastream/data_stream.c index c2ba36b3fdd84..992c048cf6644 100644 --- a/src/native/datastream/data_stream.c +++ b/src/native/datastream/data_stream.c @@ -103,7 +103,7 @@ EXTERN_C ds_validate_t dnds_validate(uint32_t m) } } -EXTERN_C bool dnds_is_big_endian(data_stream_context_t* cxt) +EXTERN_C DATA_STREAM_EXPORT bool dnds_is_big_endian(data_stream_context_t* cxt) { assert(cxt != NULL && dnds_validate(cxt->magic) != dsv_invalid); return memcmp(&cxt->magic, big_endian, sizeof(big_endian)) == 0 @@ -136,7 +136,7 @@ static uint8_t* initial_allocation( return (uint8_t*)malloc(*allocated_size); } -EXTERN_C bool dnds_init( +EXTERN_C DATA_STREAM_EXPORT bool dnds_init( data_stream_context_t* cxt, uint32_t stream_count, size_t const* stream_byte_lengths) @@ -206,7 +206,7 @@ EXTERN_C bool dnds_init( return true; } -EXTERN_C void dnds_destroy(data_stream_context_t* cxt) +EXTERN_C DATA_STREAM_EXPORT void dnds_destroy(data_stream_context_t* cxt) { assert(dnds_validate(cxt->magic) != dsv_invalid); if (cxt->streams != NULL) @@ -309,7 +309,7 @@ static stream_entry_t* alloc_stream_entry(data_stream_t* s, size_t needed) return e; } -EXTERN_C bool dnds_define_type( +EXTERN_C DATA_STREAM_EXPORT bool dnds_define_type( data_stream_context_t* cxt, type_details_t const* details, size_t total_size, @@ -345,7 +345,7 @@ EXTERN_C bool dnds_define_type( return true; } -EXTERN_C data_stream_t* dnds_get_stream( +EXTERN_C DATA_STREAM_EXPORT data_stream_t* dnds_get_stream( data_stream_context_t* cxt, size_t id) { @@ -358,7 +358,7 @@ EXTERN_C data_stream_t* dnds_get_stream( return &cxt->streams[id]; } -EXTERN_C bool dnds_record_instance( +EXTERN_C DATA_STREAM_EXPORT bool dnds_record_instance( data_stream_t* str, uint16_t type, void* inst) @@ -379,7 +379,7 @@ EXTERN_C bool dnds_record_instance( return true; } -EXTERN_C bool dnds_record_blob( +EXTERN_C DATA_STREAM_EXPORT bool dnds_record_blob( data_stream_t* str, uint16_t type, uint16_t size, @@ -516,7 +516,7 @@ static bool enum_type( return true; } -EXTERN_C bool dnds_enum_type( +EXTERN_C DATA_STREAM_EXPORT bool dnds_enum_type( data_stream_context_t* cxt, on_next_type on_next, void* user_defined, @@ -605,7 +605,7 @@ static bool enum_blobs( return true; } -EXTERN_C bool dnds_enum_blobs( +EXTERN_C DATA_STREAM_EXPORT bool dnds_enum_blobs( data_stream_context_t* cxt, on_next_blob on_next, void* user_defined, @@ -692,7 +692,7 @@ static bool enum_instances( return true; } -EXTERN_C bool dnds_enum_instances( +EXTERN_C DATA_STREAM_EXPORT bool dnds_enum_instances( data_stream_context_t* cxt, on_next_instance on_next, void* user_defined, diff --git a/src/native/datastream/data_stream.h b/src/native/datastream/data_stream.h index 3b1fb7d48bf5e..54fa2021fa9b3 100644 --- a/src/native/datastream/data_stream.h +++ b/src/native/datastream/data_stream.h @@ -9,6 +9,16 @@ #include #include +#ifdef BUILD_SHARED_LIBRARY +#ifdef _MSC_VER +#define DATA_STREAM_EXPORT __declspec(dllexport) +#else +#define DATA_STREAM_EXPORT __attribute__ ((visibility ("default"))) +#endif // _MSC_VER +#else +#define DATA_STREAM_EXPORT +#endif // BUILD_SHARED_LIBRARY + #ifdef __cplusplus extern "C" { @@ -40,7 +50,7 @@ typedef enum ds_validate_t dnds_validate(uint32_t magic); // Determine if the data stream is big or little endian. -bool dnds_is_big_endian(data_stream_context_t*); +DATA_STREAM_EXPORT bool dnds_is_big_endian(data_stream_context_t*); // // Target APIs @@ -48,13 +58,13 @@ bool dnds_is_big_endian(data_stream_context_t*); // Initialize the data stream. // Statically allocates streams with a default block size. -bool dnds_init( +DATA_STREAM_EXPORT bool dnds_init( data_stream_context_t*, uint32_t stream_count, size_t const* stream_byte_lengths); // Deallocate all memory associated with the data stream. -void dnds_destroy(data_stream_context_t*); +DATA_STREAM_EXPORT void dnds_destroy(data_stream_context_t*); // Define a maximum value for the type datatype. #define MAX_TYPE_SIZE UINT16_MAX @@ -77,7 +87,7 @@ typedef struct field_offset__ static_assert(sizeof(field_offset_t) == sizeof(uint32_t), "Field offset structures should be 4 bytes"); // Define a type in the data stream -bool dnds_define_type( +DATA_STREAM_EXPORT bool dnds_define_type( data_stream_context_t*, type_details_t const* details, size_t total_size, @@ -85,18 +95,18 @@ bool dnds_define_type( field_offset_t const* offsets); // Get an available stream to write to -data_stream_t* dnds_get_stream( +DATA_STREAM_EXPORT data_stream_t* dnds_get_stream( data_stream_context_t*, size_t id); // Record an instance in the stream -bool dnds_record_instance( +DATA_STREAM_EXPORT bool dnds_record_instance( data_stream_t*, uint16_t type, void* inst); // Record a data blob in the stream -bool dnds_record_blob( +DATA_STREAM_EXPORT bool dnds_record_blob( data_stream_t*, uint16_t type, uint16_t size, @@ -121,7 +131,7 @@ typedef bool(*on_next_type)( void* user_defined); // Enumerate the types in the data stream -bool dnds_enum_type( +DATA_STREAM_EXPORT bool dnds_enum_type( data_stream_context_t*, on_next_type on_next, void* user_defined, @@ -135,7 +145,7 @@ typedef bool(*on_next_blob)( void* user_defined); // Enumerate all blobs in the data stream -bool dnds_enum_blobs( +DATA_STREAM_EXPORT bool dnds_enum_blobs( data_stream_context_t*, on_next_blob on_next, void* user_defined, @@ -148,7 +158,7 @@ typedef bool(*on_next_instance)( void* user_defined); // Enumerate all instances in the data stream -bool dnds_enum_instances( +DATA_STREAM_EXPORT bool dnds_enum_instances( data_stream_context_t*, on_next_instance on_next, void* user_defined, From ea0dc146fdfde04750b96826a1b8cbd3de336bff Mon Sep 17 00:00:00 2001 From: Elinor Fung Date: Thu, 7 Mar 2024 15:08:13 -0800 Subject: [PATCH 3/5] Switch data_stream exports to stop using bool --- src/native/datastream/data_stream.c | 18 ++++++++-------- src/native/datastream/data_stream.h | 33 ++++++++++++++++------------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/native/datastream/data_stream.c b/src/native/datastream/data_stream.c index 992c048cf6644..8cce39a3448f5 100644 --- a/src/native/datastream/data_stream.c +++ b/src/native/datastream/data_stream.c @@ -103,7 +103,7 @@ EXTERN_C ds_validate_t dnds_validate(uint32_t m) } } -EXTERN_C DATA_STREAM_EXPORT bool dnds_is_big_endian(data_stream_context_t* cxt) +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_is_big_endian(data_stream_context_t* cxt) { assert(cxt != NULL && dnds_validate(cxt->magic) != dsv_invalid); return memcmp(&cxt->magic, big_endian, sizeof(big_endian)) == 0 @@ -136,7 +136,7 @@ static uint8_t* initial_allocation( return (uint8_t*)malloc(*allocated_size); } -EXTERN_C DATA_STREAM_EXPORT bool dnds_init( +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_init( data_stream_context_t* cxt, uint32_t stream_count, size_t const* stream_byte_lengths) @@ -309,7 +309,7 @@ static stream_entry_t* alloc_stream_entry(data_stream_t* s, size_t needed) return e; } -EXTERN_C DATA_STREAM_EXPORT bool dnds_define_type( +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_define_type( data_stream_context_t* cxt, type_details_t const* details, size_t total_size, @@ -358,7 +358,7 @@ EXTERN_C DATA_STREAM_EXPORT data_stream_t* dnds_get_stream( return &cxt->streams[id]; } -EXTERN_C DATA_STREAM_EXPORT bool dnds_record_instance( +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_record_instance( data_stream_t* str, uint16_t type, void* inst) @@ -379,7 +379,7 @@ EXTERN_C DATA_STREAM_EXPORT bool dnds_record_instance( return true; } -EXTERN_C DATA_STREAM_EXPORT bool dnds_record_blob( +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_record_blob( data_stream_t* str, uint16_t type, uint16_t size, @@ -407,7 +407,7 @@ EXTERN_C DATA_STREAM_EXPORT bool dnds_record_blob( return true; } -static bool read_local(memory_reader_t* r, intptr_t m, size_t* i, void** ptr) +static ds_bool read_local(memory_reader_t* r, intptr_t m, size_t* i, void** ptr) { *ptr = (void*)m; return true; @@ -516,7 +516,7 @@ static bool enum_type( return true; } -EXTERN_C DATA_STREAM_EXPORT bool dnds_enum_type( +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_enum_type( data_stream_context_t* cxt, on_next_type on_next, void* user_defined, @@ -605,7 +605,7 @@ static bool enum_blobs( return true; } -EXTERN_C DATA_STREAM_EXPORT bool dnds_enum_blobs( +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_enum_blobs( data_stream_context_t* cxt, on_next_blob on_next, void* user_defined, @@ -692,7 +692,7 @@ static bool enum_instances( return true; } -EXTERN_C DATA_STREAM_EXPORT bool dnds_enum_instances( +EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_enum_instances( data_stream_context_t* cxt, on_next_instance on_next, void* user_defined, diff --git a/src/native/datastream/data_stream.h b/src/native/datastream/data_stream.h index 54fa2021fa9b3..fcb68c57c6ab3 100644 --- a/src/native/datastream/data_stream.h +++ b/src/native/datastream/data_stream.h @@ -6,7 +6,6 @@ #include #include -#include #include #ifdef BUILD_SHARED_LIBRARY @@ -39,18 +38,22 @@ typedef struct data_stream_context__ } data_stream_context_t; // Validation results for a data stream. -typedef enum +enum { dsv_invalid, dsv_little_endian, dsv_big_endian, -} ds_validate_t; +}; +typedef int32_t ds_validate_t; // Validate the magic number is valid ds_validate_t dnds_validate(uint32_t magic); +// Boolean return type for data stream functions. +typedef unsigned char ds_bool; + // Determine if the data stream is big or little endian. -DATA_STREAM_EXPORT bool dnds_is_big_endian(data_stream_context_t*); +DATA_STREAM_EXPORT ds_bool dnds_is_big_endian(data_stream_context_t*); // // Target APIs @@ -58,7 +61,7 @@ DATA_STREAM_EXPORT bool dnds_is_big_endian(data_stream_context_t*); // Initialize the data stream. // Statically allocates streams with a default block size. -DATA_STREAM_EXPORT bool dnds_init( +DATA_STREAM_EXPORT ds_bool dnds_init( data_stream_context_t*, uint32_t stream_count, size_t const* stream_byte_lengths); @@ -87,7 +90,7 @@ typedef struct field_offset__ static_assert(sizeof(field_offset_t) == sizeof(uint32_t), "Field offset structures should be 4 bytes"); // Define a type in the data stream -DATA_STREAM_EXPORT bool dnds_define_type( +DATA_STREAM_EXPORT ds_bool dnds_define_type( data_stream_context_t*, type_details_t const* details, size_t total_size, @@ -100,13 +103,13 @@ DATA_STREAM_EXPORT data_stream_t* dnds_get_stream( size_t id); // Record an instance in the stream -DATA_STREAM_EXPORT bool dnds_record_instance( +DATA_STREAM_EXPORT ds_bool dnds_record_instance( data_stream_t*, uint16_t type, void* inst); // Record a data blob in the stream -DATA_STREAM_EXPORT bool dnds_record_blob( +DATA_STREAM_EXPORT ds_bool dnds_record_blob( data_stream_t*, uint16_t type, uint16_t size, @@ -118,12 +121,12 @@ DATA_STREAM_EXPORT bool dnds_record_blob( typedef struct memory_reader__ { - bool(*read_ptr)(struct memory_reader__*,intptr_t,size_t*,void**); + ds_bool(*read_ptr)(struct memory_reader__*,intptr_t,size_t*,void**); void(*free_ptr)(struct memory_reader__*,size_t,void*); } memory_reader_t; // Return false to stop enumeration -typedef bool(*on_next_type)( +typedef ds_bool(*on_next_type)( type_details_t const* details, size_t total_size, size_t offsets_length, @@ -131,34 +134,34 @@ typedef bool(*on_next_type)( void* user_defined); // Enumerate the types in the data stream -DATA_STREAM_EXPORT bool dnds_enum_type( +DATA_STREAM_EXPORT ds_bool dnds_enum_type( data_stream_context_t*, on_next_type on_next, void* user_defined, memory_reader_t* reader); // Return false to stop enumeration -typedef bool(*on_next_blob)( +typedef ds_bool(*on_next_blob)( uint16_t type, uint16_t data_size_bytes, void* data, void* user_defined); // Enumerate all blobs in the data stream -DATA_STREAM_EXPORT bool dnds_enum_blobs( +DATA_STREAM_EXPORT ds_bool dnds_enum_blobs( data_stream_context_t*, on_next_blob on_next, void* user_defined, memory_reader_t* reader); // Return false to stop enumeration -typedef bool(*on_next_instance)( +typedef ds_bool(*on_next_instance)( uint16_t type, intptr_t instance, void* user_defined); // Enumerate all instances in the data stream -DATA_STREAM_EXPORT bool dnds_enum_instances( +DATA_STREAM_EXPORT ds_bool dnds_enum_instances( data_stream_context_t*, on_next_instance on_next, void* user_defined, From 9c8e3481fb0c71f4c41d519975bee9b328e585e2 Mon Sep 17 00:00:00 2001 From: Elinor Fung Date: Thu, 7 Mar 2024 20:58:28 -0800 Subject: [PATCH 4/5] Add doc --- docs/design/features/data-stream.md | 106 ++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 docs/design/features/data-stream.md diff --git a/docs/design/features/data-stream.md b/docs/design/features/data-stream.md new file mode 100644 index 0000000000000..9ccb9f52dacdf --- /dev/null +++ b/docs/design/features/data-stream.md @@ -0,0 +1,106 @@ +## .NET runtime data stream + +The .NET runtime data stream is mechanism for the runtime to encode information about itself in a way that is accessible to diagnostic tools. This enables de-coupling of the tooling (for example, the DAC) from the details of a specific version of the runtime. + +Data Streams consist of three concepts. + +1. A collection of type descriptions. + +2. A collection of instances (i.e., pointers) of types described in (1). + +3. A collection of value blobs. + +4. A versioning scheme that permits evolution of (1), (2), and (3). + +The data streams model begins with a header that captures the minimum needed data—`data_stream_context_t`. The header contains a mechanism, `magic` field, for confirming the memory is what we expect _and_ also serves to indicate the endianness of the target process. The endianness of the target is important for scenarios where the reader is running on another machine. The header also contains data stream versioning and statically allocated stream count. + +### Streams and Blocks (ver.1) + +The stream, `data_stream_t`, is an opaque data structure to the user and is an implementation detail. The only indication of changes to it are captured in the version value contained on the header. The current design will be described and is considered version "1". + +A stream is a singly linked list of uniform sized blocks that are allocated on demand. The stream itself is a small type used to hold the head of the list of blocks, the `curr` pointer, and a pointer to the data stream header, `cxt`. + +The core of the stream is the block, `data_block_t`. This data structure is a contiguous allocation with (4) pointer slots—`begin`, `end`, `pos` and `prev`. The `begin`, `end` and `prev` pointers are set on allocation and never change. The internal block allocation scheme is commonly called a "bump allocator". The `pos` pointer represents the current spot in the range between `begin` and `end`. Blocks are filled in reverse order (`end` to `begin`) to ensure reading of a stream is always performed in reverse chronological order. + +Both `pos` value on the `data_block_t` and `curr` on the `data_stream_t` are both updated atomically and expected to be lock-free. + +Within each block an entry data structure, `stream_entry_t`, is used to quickly and safely add new entries. An entry consists of a field, `offset_next`, to hold the relative offset from the current entry to the next. This offset concept makes reading easy since once the entire block is read from the target no further memory reads are needed to walk the block. + +The simplicity of the streams and blocks makes reading from another process simple. + +#### Types (ver.1) + +The collection of types are all recorded in the first stream in the `data_stream_context_t` type's `streams` field. + +Types are expressed with minimal data to efficiently version and read from a target process. Type definitions start with an identifying tuple—`type` (numeric ID), `version` and `name`. The tuple's design facilitates creation of a map look-up on the reader side and a way to evolve the definition safely on the target side. + +The layout of a type is expressed by the size, in bytes, and a collection of relevant field offsets and their type, `field_offset_t`. Field offset count is computed by reading in two pointer sized values and then dividing the remaining space by the size of the `field_offset_t` data structure. Both of these components are needed to satisfy the evolution and reading efficiency goals. The size allows the reader to read an entire type in one operation and the field offsets need not be exhaustive if they provide no utility on the reader side. + +An example of the current memory layout of a type entry is below. + +``` +| type_details_t* | # Pointer in target process +| size_t | # Total size, in bytes, of the type +| field_offset_t 1 | # First field offset +| ... | +| field_offset_t N | # Last field offset +``` + +#### Instances (ver.1) + +All streams, other than the first stream in the `data_stream_context_t` type's `streams` field, which is used for types, contain instances. + +Instances are defined as a numeric ID and a valid pointer in the target process. The numeric ID is expected to exist in one of the type identifier tuples defined above. + +An example of the current memory layout of an instance entry is below. + +``` +| uint16_t | # Type numeric ID +| intptr_t | # Target process memory +``` + +### Target usage + +Consumption of data streams should start with a mechanism for defining the type identity tuple that can be shared between the target and reader. + +A `data_stream_context_t` instance should be allocated, statically or dynamically, in a manner where its address is discoverable by the reader process. The `data_stream_context_t` instance must be initialized and static stream count and block sizes defined. There must be at least a single stream size for use in the type definitions. It is expected that the type's stream has a block size that is sufficient to hold all type defintions without an additional allocation. + +Type versions or names are not used directly by the target process. The target process records these values as an indication for the reader only. + +**NOTE** Registration of types should be done prior to any recording of instances. It is assumed that all sharable types are known statically. + +After type registration is performed, streams can be aquired by components in the target process and typed instances inserted into the stream. Adding an instance to a stream is considered thread safe. The typing of an instance should be done via a numeric ID. + +### Reader usage + +The reader should first define a series of type names that it is able to interpret and consume. These type names should match the names defined by the target. These names could be used to map types in the target process with their numeric ID and version. The reader is not expected to have any hardcoded numeric type IDs as these are subject to change between target versions. + +The reader is expected to be resiliant in recieving an unknown version of a type and gracefully interpret it. Two examples of graceful interpretation are describing it as `"Unknown"` or printing its memory address in the target process. + +After acquiring the target process's `data_stream_context_t` address, it should be validated and endianness computed. + +The first time the data stream is read, the target processes types are enumerated in reserve chronological order (Last In/First Out) and this data may be used throughout the lifetime of the target process. During the enumeration of types the following can be done: + +* Creation of a fast mapping from name to numeric ID. + +* Creation of look-up map to type details (e.g., field offsets). + +* Validation of supported type versions. + +After type enumeration is complete, instance streams can be enumerated and interpreted. The size contained within the type description allows the reader to read in the entire type and then use field offsets to poke into that memory. The reading in of the entire data type helps with efficiency and acts as a versioning resiliance mechanism. Adding new fields to a type, without changing the version, need not represent a breaking change. + +### Design FAQs + +--- + +**Q1** Why are streams allowed to grow? + +**A1** Consider the case where a data structure in the target process has a specific use case but the reader has either stricter or looser requirements. An example would be a thread pool used in the target process. This structure would ideally only be concerned with current threads in the target process, exited threads having been removed. However, the reader process likely has a need for knowing when a thread instance has exited to update its own internal state. A possible solution is to fully query the thread pool data structure each time. However, if instead entries for created and deleted threads are recorded in a stream, the reader only needs to know the delta as opposed to querying the thread pool each time. The logic follows for any data structure that contains objects with transient lifetimes. + +--- + +**Q2** Why are the contents of a stream immutable? + +**A2** Having streams that are mutable means the reader _must_ always re-read the full stream to validate for updates. If the contents of a stream are instead immutable _and_ in reverse chronological order (LIFO), then entries for "deleted" or "invalidated" data are possible, which enables readers to consume deltas and reduce cross-process inspection. + +--- \ No newline at end of file From 2e40374f86199962f6b386e034285359d862cafc Mon Sep 17 00:00:00 2001 From: Elinor Fung Date: Fri, 8 Mar 2024 08:43:59 -0800 Subject: [PATCH 5/5] Fix x86 build --- src/native/datastream/data_stream.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/native/datastream/data_stream.c b/src/native/datastream/data_stream.c index 8cce39a3448f5..69544d84ed410 100644 --- a/src/native/datastream/data_stream.c +++ b/src/native/datastream/data_stream.c @@ -341,7 +341,7 @@ EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_define_type( curr += sizeof(total_size); memcpy(curr, offsets, offsets_bytes); curr += offsets_bytes; - assert(entry->offset_next == (curr - (uint8_t*)entry)); + assert(entry->offset_next == (uint32_t)(curr - (uint8_t*)entry)); return true; } @@ -375,7 +375,7 @@ EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_record_instance( curr += sizeof(type); memcpy(curr, &inst, sizeof(inst)); curr += sizeof(inst); - assert(entry->offset_next == (curr - (uint8_t*)entry)); + assert(entry->offset_next == (uint32_t)(curr - (uint8_t*)entry)); return true; } @@ -403,7 +403,7 @@ EXTERN_C DATA_STREAM_EXPORT ds_bool dnds_record_blob( curr += sizeof(size); memcpy(curr, inst, size); curr += size; - assert(entry->offset_next == (curr - (uint8_t*)entry)); + assert(entry->offset_next == (uint32_t)(curr - (uint8_t*)entry)); return true; }