diff --git a/docs/design/datacontracts/contract-descriptor.md b/docs/design/datacontracts/contract-descriptor.md index 1e3ddabd6dd73..e63ae11b4ae93 100644 --- a/docs/design/datacontracts/contract-descriptor.md +++ b/docs/design/datacontracts/contract-descriptor.md @@ -24,9 +24,9 @@ struct DotNetRuntimeContractDescriptor uint32_t flags; uint32_t descriptor_size; const char *descriptor; - uint32_t aux_data_count; + uint32_t pointer_data_count; uint32_t pad0; - uintptr_t *aux_data; + uintptr_t *pointer_data; }; ``` diff --git a/docs/design/datacontracts/data/empty.jsonc b/docs/design/datacontracts/data/empty.jsonc new file mode 100644 index 0000000000000..29d15882a36b8 --- /dev/null +++ b/docs/design/datacontracts/data/empty.jsonc @@ -0,0 +1,4 @@ +// the empty baseline data descriptor +{ + "version": 0 +} diff --git a/eng/Subsets.props b/eng/Subsets.props index 9de503dd5002f..0a51b12649d1e 100644 --- a/eng/Subsets.props +++ b/eng/Subsets.props @@ -360,6 +360,8 @@ + + diff --git a/src/coreclr/debug/runtimeinfo/CMakeLists.txt b/src/coreclr/debug/runtimeinfo/CMakeLists.txt index e6d45ada12013..2a4cfc2dca3ff 100644 --- a/src/coreclr/debug/runtimeinfo/CMakeLists.txt +++ b/src/coreclr/debug/runtimeinfo/CMakeLists.txt @@ -37,3 +37,46 @@ endif() # publish runtimeinfo lib install_clr(TARGETS runtimeinfo DESTINATIONS lib COMPONENT runtime) + +add_library(cdac_data_descriptor OBJECT datadescriptor.cpp) +# don't build the data descriptor before the VM (and any of its dependencies' generated headers) +add_dependencies(cdac_data_descriptor cee_wks_core) +if(CLR_CMAKE_TARGET_WIN32) + # turn off whole program optimization: + # 1. it creates object files that cdac-build-tool can't read + # 2. we never link cdac_data_descriptor into the final product - it's only job is to be scraped + target_compile_options(cdac_data_descriptor PRIVATE /GL-) +endif() +target_include_directories(cdac_data_descriptor BEFORE PRIVATE ${VM_DIR}) +target_include_directories(cdac_data_descriptor BEFORE PRIVATE ${VM_DIR}/${ARCH_SOURCES_DIR}) +target_include_directories(cdac_data_descriptor PRIVATE ${CLR_DIR}/interop/inc) + +set(GENERATED_CDAC_DESCRIPTOR_DIR "${CMAKE_CURRENT_BINARY_DIR}/cdac") +set(CONTRACT_DESCRIPTOR_OUTPUT "${GENERATED_CDAC_DESCRIPTOR_DIR}/contract-descriptor.c") +if("${CDAC_BUILD_TOOL_BINARY_PATH}" STREQUAL "" OR NOT EXISTS "${CDAC_BUILD_TOOL_BINARY_PATH}") + message(FATAL_ERROR "No cdac-build-tool set or ${CDAC_BUILD_TOOL_BINARY_PATH} does not exist") +endif() + +set(CONTRACT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/contracts.jsonc") + +# generate the contract descriptor by running cdac-build-tool +# n.b. this just uses `dotnet` from the PATH. InitializeDotNetCli adds the apropropriate directory +add_custom_command( + OUTPUT "${CONTRACT_DESCRIPTOR_OUTPUT}" + VERBATIM + COMMAND dotnet ${CDAC_BUILD_TOOL_BINARY_PATH} compose -o "${CONTRACT_DESCRIPTOR_OUTPUT}" -c "${CONTRACT_FILE}" $ + DEPENDS cdac_data_descriptor cee_wks_core $ "${CONTRACT_FILE}" + USES_TERMINAL +) + +# It is important that cdac_contract_descriptor is an object library; +# if it was static, linking it into the final dll would not export +# DotNetRuntimeContractDescriptor since it is not referenced anywhere. +add_library_clr(cdac_contract_descriptor OBJECT + "${CONTRACT_DESCRIPTOR_OUTPUT}" + contractpointerdata.cpp +) +target_include_directories(cdac_contract_descriptor BEFORE PRIVATE ${VM_DIR}) +target_include_directories(cdac_contract_descriptor BEFORE PRIVATE ${VM_DIR}/${ARCH_SOURCES_DIR}) +target_include_directories(cdac_contract_descriptor PRIVATE ${CLR_DIR}/interop/inc) +add_dependencies(cdac_contract_descriptor cdac_data_descriptor cee_wks_core) diff --git a/src/coreclr/debug/runtimeinfo/contractpointerdata.cpp b/src/coreclr/debug/runtimeinfo/contractpointerdata.cpp new file mode 100644 index 0000000000000..ae1440af4219a --- /dev/null +++ b/src/coreclr/debug/runtimeinfo/contractpointerdata.cpp @@ -0,0 +1,23 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +#include +#include + +#include "threads.h" + +extern "C" +{ + +// without an extern declaration, clang does not emit this global into the object file +extern const uintptr_t contractDescriptorPointerData[]; + +const uintptr_t contractDescriptorPointerData[] = { + (uintptr_t)0, // placeholder +#define CDAC_GLOBAL_POINTER(name,value) (uintptr_t)(value), +#include "datadescriptor.h" +}; + +} diff --git a/src/coreclr/debug/runtimeinfo/contracts.jsonc b/src/coreclr/debug/runtimeinfo/contracts.jsonc new file mode 100644 index 0000000000000..186230d5c68d6 --- /dev/null +++ b/src/coreclr/debug/runtimeinfo/contracts.jsonc @@ -0,0 +1,14 @@ +//algorithmic contracts for coreclr +// The format of this file is: JSON with comments +// { +// "CONTRACT NAME": VERSION, +// ... +// } +// CONTRACT NAME is an arbitrary string, VERSION is an integer +// +// cdac-build-tool can take multiple "-c contract_file" arguments +// so to conditionally include contracts, put additional contracts in a separate file +{ + "SOSBreakingChangeVersion": 1 // example contract: "runtime exports an SOS breaking change version global" +} + diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.cpp b/src/coreclr/debug/runtimeinfo/datadescriptor.cpp new file mode 100644 index 0000000000000..99fe1cca7eeca --- /dev/null +++ b/src/coreclr/debug/runtimeinfo/datadescriptor.cpp @@ -0,0 +1,297 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +#include +#include + +#include "static_assert.h" + +#include +#include "threads.h" + +// begin blob definition + +extern "C" +{ + +struct TypeSpec +{ + uint32_t Name; + uint32_t Fields; + uint16_t Size; // note: C++ fragile no designated initializers - Size must come after Name and Fields +}; + +struct FieldSpec +{ + uint32_t Name; + uint32_t TypeName; + uint16_t FieldOffset; +}; + +struct GlobalLiteralSpec +{ + uint32_t Name; + uint32_t TypeName; + uint64_t Value; +}; + +struct GlobalPointerSpec +{ + uint32_t Name; + uint32_t PointerDataIndex; +}; + +#define CONCAT(token1,token2) token1 ## token2 +#define CONCAT4(token1, token2, token3, token4) token1 ## token2 ## token3 ## token4 + +#define MAKE_TYPELEN_NAME(tyname) CONCAT(cdac_string_pool_typename__, tyname) +#define MAKE_FIELDLEN_NAME(tyname,membername) CONCAT4(cdac_string_pool_membername__, tyname, __, membername) +#define MAKE_FIELDTYPELEN_NAME(tyname,membername) CONCAT4(cdac_string_pool_membertypename__, tyname, __, membername) +#define MAKE_GLOBALLEN_NAME(globalname) CONCAT(cdac_string_pool_globalname__, globalname) +#define MAKE_GLOBALTYPELEN_NAME(globalname) CONCAT(cdac_string_pool_globaltypename__, globalname) + +// define a struct where the size of each field is the length of some string. we will use offsetof to get +// the offset of each struct element, which will be equal to the offset of the beginning of that string in the +// string pool. +struct CDacStringPoolSizes +{ + char cdac_string_pool_nil; // make the first real string start at offset 1 +#define DECL_LEN(membername,len) char membername[(len)]; +#define CDAC_BASELINE(name) DECL_LEN(cdac_string_pool_baseline_, (sizeof(name))) +#define CDAC_TYPE_BEGIN(name) DECL_LEN(MAKE_TYPELEN_NAME(name), sizeof(#name)) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) DECL_LEN(MAKE_FIELDLEN_NAME(tyname,membername), sizeof(#membername)) \ + DECL_LEN(MAKE_FIELDTYPELEN_NAME(tyname,membername), sizeof(#membertyname)) +#define CDAC_GLOBAL_POINTER(name,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) +#define CDAC_GLOBAL(name,tyname,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) \ + DECL_LEN(MAKE_GLOBALTYPELEN_NAME(name), sizeof(#tyname)) +#include "datadescriptor.h" + char cdac_string_pool_trailing_nil; +#undef DECL_LEN +}; + +#define GET_TYPE_NAME(name) offsetof(struct CDacStringPoolSizes, MAKE_TYPELEN_NAME(name)) +#define GET_FIELD_NAME(tyname,membername) offsetof(struct CDacStringPoolSizes, MAKE_FIELDLEN_NAME(tyname,membername)) +#define GET_FIELDTYPE_NAME(tyname,membername) offsetof(struct CDacStringPoolSizes, MAKE_FIELDTYPELEN_NAME(tyname,membername)) +#define GET_GLOBAL_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALLEN_NAME(globalname)) +#define GET_GLOBALTYPE_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALTYPELEN_NAME(globalname)) + +// count the types +enum +{ + CDacBlobTypesCount = +#define CDAC_TYPES_BEGIN() 0 +#define CDAC_TYPE_BEGIN(name) + 1 +#include "datadescriptor.h" +}; + +// count the field pool size. +// there's 1 placeholder element at the start, and 1 endmarker after each type +enum +{ + CDacBlobFieldsPoolCount = +#define CDAC_TYPES_BEGIN() 1 +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) + 1 +#define CDAC_TYPE_END(name) + 1 +#include "datadescriptor.h" +}; + +// count the literal globals +enum +{ + CDacBlobGlobalLiteralsCount = +#define CDAC_GLOBALS_BEGIN() 0 +#define CDAC_GLOBAL(name,tyname,value) + 1 +#include "datadescriptor.h" +}; + +// count the aux vector globals +enum +{ + CDacBlobGlobalPointersCount = +#define CDAC_GLOBALS_BEGIN() 0 +#define CDAC_GLOBAL_POINTER(name,value) + 1 +#include "datadescriptor.h" +}; + + +#define MAKE_TYPEFIELDS_TYNAME(tyname) CONCAT(CDacFieldsPoolTypeStart__, tyname) + +// index of each run of fields. +// we make a struct containing one 1-byte field for each field in the run, and then take the offset of the +// struct to get the index of the run of fields. +// this looks like +// +// struct CDacFieldsPoolSizes { +// char cdac_fields_pool_start_placeholder__; +// struct CDacFieldsPoolTypeStart__MethodTable { +// char cdac_fields_pool_member__MethodTable__GCHandle; +// char cdac_fields_pool_member__MethodTable_endmarker; +// } CDacFieldsPoolTypeStart__MethodTable; +// ... +// }; +// +// so that offsetof(struct CDacFieldsPoolSizes, CDacFieldsPoolTypeStart__MethodTable) will give the offset of the +// method table field descriptors in the run of fields +struct CDacFieldsPoolSizes +{ +#define DECL_LEN(membername) char membername; +#define CDAC_TYPES_BEGIN() DECL_LEN(cdac_fields_pool_start_placeholder__) +#define CDAC_TYPE_BEGIN(name) struct MAKE_TYPEFIELDS_TYNAME(name) { +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) DECL_LEN(CONCAT4(cdac_fields_pool_member__, tyname, __, membername)) +#define CDAC_TYPE_END(name) DECL_LEN(CONCAT4(cdac_fields_pool_member__, tyname, _, endmarker)) \ + } MAKE_TYPEFIELDS_TYNAME(name); +#include "datadescriptor.h" +#undef DECL_LEN +}; + +#define GET_TYPE_FIELDS(tyname) offsetof(struct CDacFieldsPoolSizes, MAKE_TYPEFIELDS_TYNAME(tyname)) + +// index of each global pointer +// +// struct CDacGlobalPointerIndex +// { +// char placeholder; +// char firstGlobalPointerName; +// char secondGlobalPointerName; +// ... +//} +// +// offsetof (CDACGlobalPointerIndex, NAME) returns the index of the global +struct CDacGlobalPointerIndex +{ +#define DECL_LEN(membername) char membername; +#define CDAC_GLOBALS_BEGIN() DECL_LEN(cdac_global_pointer_index_start_placeholder__) +#define CDAC_GLOBAL_POINTER(name,value) DECL_LEN(CONCAT(cdac_global_pointer_index__, name)) +#include "datadescriptor.h" +#undef DECL_LEN +}; + +#define GET_GLOBAL_POINTER_INDEX(name) offsetof(struct CDacGlobalPointerIndex, CONCAT(cdac_global_pointer_index__, name)) + +struct BinaryBlobDataDescriptor +{ + // The blob begins with a directory that gives the relative offsets of the `Baseline`, `Types`, + // `FieldsPool`, `GlobalLiteralValues`, `GlobalPointerValues` and `Names` fields of the blob. + // The number of elements of each of the arrays is next. This is followed by the sizes of the + // spec structs. Since `BinaryBlobDataDescriptor` is created via macros, we want to embed the + // `offsetof` and `sizeof` of the components of the blob into the blob itself without having to + // account for any padding that the C/C++ compiler may introduce to enforce alignment. + // Additionally the `Directory` tries to follow a common C/C++ alignment rule (we don't want + // padding introduced in the directory itself): N-byte members are aligned to start on N-byte + // boundaries. + struct Directory { + uint32_t FlagsAndBaselineStart; + uint32_t TypesStart; + + uint32_t FieldsPoolStart; + uint32_t GlobalLiteralValuesStart; + + uint32_t GlobalPointersStart; + uint32_t NamesPoolStart; + + uint32_t TypeCount; + uint32_t FieldsPoolCount; + + uint32_t GlobalLiteralValuesCount; + uint32_t GlobalPointerValuesCount; + + uint32_t NamesPoolCount; + + uint8_t TypeSpecSize; + uint8_t FieldSpecSize; + uint8_t GlobalLiteralSpecSize; + uint8_t GlobalPointerSpecSize; + } Directory; + uint32_t PlatformFlags; + uint32_t BaselineName; + struct TypeSpec Types[CDacBlobTypesCount]; + struct FieldSpec FieldsPool[CDacBlobFieldsPoolCount]; + struct GlobalLiteralSpec GlobalLiteralValues[CDacBlobGlobalLiteralsCount]; + struct GlobalPointerSpec GlobalPointerValues[CDacBlobGlobalPointersCount]; + uint8_t NamesPool[sizeof(struct CDacStringPoolSizes)]; + uint8_t EndMagic[4]; +}; + +struct MagicAndBlob { + uint64_t magic; + struct BinaryBlobDataDescriptor Blob; +}; + +// we only support 32-bit and 64-bit right now +static_assert_no_msg(sizeof(void*) == 4 || sizeof(void*) == 8); + +// C-style designated initializers are a C++20 feature. Have to use plain old aggregate initialization instead. + +DLLEXPORT +struct MagicAndBlob BlobDataDescriptor = { + /*.magic = */ 0x00424F4C42434144ull,// "DACBLOB", + /*.Blob =*/ { + /*.Directory =*/ { + /* .FlagsAndBaselineStart = */ offsetof(struct BinaryBlobDataDescriptor, PlatformFlags), + /* .TypesStart = */ offsetof(struct BinaryBlobDataDescriptor, Types), + /* .FieldsPoolStart = */ offsetof(struct BinaryBlobDataDescriptor, FieldsPool), + /* .GlobalLiteralValuesStart = */ offsetof(struct BinaryBlobDataDescriptor, GlobalLiteralValues), + /* .GlobalPointersStart = */ offsetof(struct BinaryBlobDataDescriptor, GlobalPointerValues), + /* .NamesPoolStart = */ offsetof(struct BinaryBlobDataDescriptor, NamesPool), + /* .TypeCount = */ CDacBlobTypesCount, + /* .FieldsPoolCount = */ CDacBlobFieldsPoolCount, + /* .GlobalLiteralValuesCount = */ CDacBlobGlobalLiteralsCount, + /* .GlobalPointerValuesCount = */ CDacBlobGlobalPointersCount, + /* .NamesPoolCount = */ sizeof(struct CDacStringPoolSizes), + /* .TypeSpecSize = */ sizeof(struct TypeSpec), + /* .FieldSpecSize = */ sizeof(struct FieldSpec), + /* .GlobalLiteralSpecSize = */ sizeof(struct GlobalLiteralSpec), + /* .GlobalPointerSpecSize = */ sizeof(struct GlobalPointerSpec), + }, + /* .PlatformFlags = */ (sizeof(void*) == 4 ? 0x02 : 0) | 0x01, + /* .BaselineName = */ offsetof(struct CDacStringPoolSizes, cdac_string_pool_baseline_), + + /* .Types = */ { +#define CDAC_TYPE_BEGIN(name) { \ + /* .Name = */ GET_TYPE_NAME(name), \ + /* .Fields = */ GET_TYPE_FIELDS(name), +#define CDAC_TYPE_INDETERMINATE(name) /*.Size = */ 0, +#define CDAC_TYPE_SIZE(size) /* .Size = */ size, +#define CDAC_TYPE_END(name) }, +#include "datadescriptor.h" + }, + + /* .FieldsPool = */ { +#define CDAC_TYPES_BEGIN() {0,}, +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) { \ + /* .Name = */ GET_FIELD_NAME(tyname,membername), \ + /* .TypeName = */ GET_FIELDTYPE_NAME(tyname,membername), \ + /* .FieldOffset = */ offset, \ +}, +#define CDAC_TYPE_END(name) { 0, }, +#include "datadescriptor.h" + }, + + /* .GlobalLiteralValues = */ { +#define CDAC_GLOBAL(name,tyname,value) { /*.Name = */ GET_GLOBAL_NAME(name), /* .TypeName = */ GET_GLOBALTYPE_NAME(name), /* .Value = */ value }, +#include "datadescriptor.h" + }, + + /* .GlobalPointerValues = */ { +#define CDAC_GLOBAL_POINTER(name,value) { /* .Name = */ GET_GLOBAL_NAME(name), /* .PointerDataIndex = */ GET_GLOBAL_POINTER_INDEX(name) }, +#include "datadescriptor.h" + }, + + /* .NamesPool = */ ("\0" // starts with a nul +#define CDAC_BASELINE(name) name "\0" +#define CDAC_TYPE_BEGIN(name) #name "\0" +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) #membername "\0" #membertyname "\0" +#define CDAC_GLOBAL_POINTER(name,value) #name "\0" +#define CDAC_GLOBAL(name,tyname,value) #name "\0" #tyname "\0" +#include "datadescriptor.h" + ), + + /* .EndMagic = */ { 0x01, 0x02, 0x03, 0x04 }, + } +}; + +// end blob definition + +} // extern "C" diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.h b/src/coreclr/debug/runtimeinfo/datadescriptor.h new file mode 100644 index 0000000000000..b5ab51774e121 --- /dev/null +++ b/src/coreclr/debug/runtimeinfo/datadescriptor.h @@ -0,0 +1,139 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +// No include guards. This file is included multiple times. + +// The format is: +// CDAC_BASELINE("string") baseline data contract that the runtime should follow. "empty" is reasonable +// CDAC_TYPES_BEGIN() +// ... ... +// CDAC_TYPES_END() +// CDAC_GLOBALS_BEGIN() +// ... ... +// CDAC_GLOBALS_END() +// +// In the format is: +// CDAC_TYPE_BEGIN(cdacTypeIdentifier) // defined a new data descriptor named cdacIdentifier +// +// CDAC_TYPE_SIZE(k) -or- CDAC_TYPE_INDETERMINATE(cdacTypeIdentifier) specifies that the type has +// size k (bytes - usually sizeof(SomeNativeType)) or specify that the type's size is not provided +// It is important that CDAC_TYPE_SIZE or CDAC_TYPE_INDETERMINATE immediately follows +// CDAC_TYPE_BEGIN +// +// CDAC_TYPE_FIELD(cdacTypeIdentifier, cdacFieldTypeIdentifier, cdacFieldName, k) specifies the +// field of "cdacTypeIdentifier" that has name cdacFieldName and has the type +// "cdacFieldtypeIdentifier" located at offset k in the type layout. k is usually +// offsetof(SomeClass, m_FieldName) if the field is public +// +// if the field is private, the convention is that SomeClass declares a friend struct +// cdac_offsets and provides a specialization of cdac_offsets with a public constexpr +// size_t member that holds the offset: +// +// class MyClass { +// private: +// void* m_myField; +// friend template cdac_offsets; +// }; +// template<> struct cdac_offsets { +// static constexpr size_t MyField = offsetof(MyClass, m_myField); +// }; +// +// then the field layout can be specified as +// CDAC_TYPE_FIELD(MyClassLayout, pointer, MyField, cdac_offsets::MyField) +// There can be zero or more CDAC_TYPE_FIELD entries per type layout +// +// CDAC_TYPE_END(cdacTypeIdentifier) specifies the end of the type layout for cdacTypeIdentifier +// +// In the format is: +// +// CDAC_GLOBAL(cdacGlobalName, cdacTypeIdentifier, value) +// or +// CDAC_GLOBAL_POINTER(cdacGlobalName, cdacTypeIdentifier, address) +// +// Zero or more globals can be defined +// +// if a global is given with CDAC_GLOBAL(), `value` should be a constexpr uint64_t (or convertible +// to uint64_t) for example, it can be a literal constant or a preprocessor definition +// +// if a global is a CDAC_GLOBAL_POINTER(), address should be a constexpr pointer or a constexpr +// uintptr_t +// +// +// +// This file is compiled using the target architecture. Preprocessor defines for the target +// platform will be available. It is ok to use `#ifdef`. + +#ifndef CDAC_BASELINE +#define CDAC_BASELINE(identifier) +#endif +#ifndef CDAC_TYPES_BEGIN +#define CDAC_TYPES_BEGIN() +#endif +#ifndef CDAC_TYPE_BEGIN +#define CDAC_TYPE_BEGIN(tyname) +#endif +#ifndef CDAC_TYPE_SIZE +#define CDAC_TYPE_SIZE(k) +#endif +#ifndef CDAC_TYPE_INDETERMINATE +#define CDAC_TYPE_INDETERMINATE(tyname) +#endif +#ifndef CDAC_TYPE_FIELD +#define CDAC_TYPE_FIELD(tyname,fieldtyname,fieldname,off) +#endif +#ifndef CDAC_TYPE_END +#define CDAC_TYPE_END(tyname) +#endif +#ifndef CDAC_TYPES_END +#define CDAC_TYPES_END() +#endif +#ifndef CDAC_GLOBALS_BEGIN +#define CDAC_GLOBALS_BEGIN() +#endif +#ifndef CDAC_GLOBAL +#define CDAC_GLOBAL(globalname,tyname,val) +#endif +#ifndef CDAC_GLOBAL_POINTER +#define CDAC_GLOBAL_POINTER(globalname,addr) +#endif +#ifndef CDAC_GLOBALS_END +#define CDAC_GLOBALS_END() +#endif + +CDAC_BASELINE("empty") +CDAC_TYPES_BEGIN() + +CDAC_TYPE_BEGIN(ManagedThread) +CDAC_TYPE_INDETERMINATE(ManagedThread) +CDAC_TYPE_FIELD(ManagedThread, GCHandle, GCHandle, cdac_offsets::ExposedObject) +CDAC_TYPE_FIELD(ManagedThread, pointer, LinkNext, cdac_offsets::Link) +CDAC_TYPE_END(ManagedThread) + +CDAC_TYPE_BEGIN(GCHandle) +CDAC_TYPE_SIZE(sizeof(OBJECTHANDLE)) +CDAC_TYPE_END(GCHandle) + +CDAC_TYPES_END() + +CDAC_GLOBALS_BEGIN() +CDAC_GLOBAL_POINTER(ManagedThreadStore, &ThreadStore::s_pThreadStore) +#if FEATURE_EH_FUNCLETS +CDAC_GLOBAL(FeatureEHFunclets, uint8, 1) +#else +CDAC_GLOBAL(FeatureEHFunclets, uint8, 0) +#endif +CDAC_GLOBAL(SOSBreakingChangeVersion, uint8, SOS_BREAKING_CHANGE_VERSION) +CDAC_GLOBALS_END() + +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef CDAC_TYPES_END +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBALS_END diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt index c600af1fb6aad..7ba58d0297f9a 100644 --- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt @@ -109,6 +109,7 @@ set(CORECLR_LIBRARIES interop coreclrminipal gc_pal + cdac_contract_descriptor ) if(CLR_CMAKE_TARGET_ARCH_AMD64) diff --git a/src/coreclr/dlls/mscoree/mscorwks_ntdef.src b/src/coreclr/dlls/mscoree/mscorwks_ntdef.src index 0ac421b63e071..a2076bd62433c 100644 --- a/src/coreclr/dlls/mscoree/mscorwks_ntdef.src +++ b/src/coreclr/dlls/mscoree/mscorwks_ntdef.src @@ -28,3 +28,6 @@ EXPORTS ; Used by profilers MetaDataGetDispenser + + ; cDAC contract descriptor + DotNetRuntimeContractDescriptor diff --git a/src/coreclr/dlls/mscoree/mscorwks_unixexports.src b/src/coreclr/dlls/mscoree/mscorwks_unixexports.src index a35a59c095604..3eacb7fa48485 100644 --- a/src/coreclr/dlls/mscoree/mscorwks_unixexports.src +++ b/src/coreclr/dlls/mscoree/mscorwks_unixexports.src @@ -14,3 +14,6 @@ g_dacTable ; Used by profilers MetaDataGetDispenser + +; cDAC contract descriptor +DotNetRuntimeContractDescriptor diff --git a/src/coreclr/runtime-prereqs.proj b/src/coreclr/runtime-prereqs.proj index 6bbe50f7d550a..91e3f46d59d5f 100644 --- a/src/coreclr/runtime-prereqs.proj +++ b/src/coreclr/runtime-prereqs.proj @@ -15,6 +15,7 @@ + diff --git a/src/coreclr/runtime.proj b/src/coreclr/runtime.proj index 773b0290d523f..c231ebd07a80e 100644 --- a/src/coreclr/runtime.proj +++ b/src/coreclr/runtime.proj @@ -50,6 +50,7 @@ <_CoreClrBuildArg Condition="'$(HostCrossOS)' != ''" Include="-hostos $(HostCrossOS)" /> <_CoreClrBuildArg Include="-outputrid $(OutputRID)" /> <_CoreClrBuildArg Condition="'$(BuildSubdirectory)' != ''" Include="-subdir $(BuildSubdirectory)" /> + <_CoreClrBuildArg Include="-cmakeargs "-DCDAC_BUILD_TOOL_BINARY_PATH=$(RuntimeBinDir)cdac-build-tool\cdac-build-tool.dll"" /> diff --git a/src/coreclr/tools/cdac-build-tool/ComposeCommand.cs b/src/coreclr/tools/cdac-build-tool/ComposeCommand.cs new file mode 100644 index 0000000000000..226244a88872a --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/ComposeCommand.cs @@ -0,0 +1,91 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.CommandLine; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +internal sealed class ComposeCommand : CliCommand +{ + private readonly CliArgument inputFiles = new("INPUT [INPUTS...]") { Arity = ArgumentArity.OneOrMore, Description = "One or more input files" }; + private readonly CliOption outputFile = new("-o") { Arity = ArgumentArity.ExactlyOne, HelpName = "OUTPUT", Required = true, Description = "Output file" }; + private readonly CliOption contractFile = new("-c") { Arity = ArgumentArity.ZeroOrMore, HelpName = "CONTRACT", Description = "Contract file (may be specified multiple times)" }; + private readonly CliOption _verboseOption; + public ComposeCommand(CliOption verboseOption) : base("compose") + { + _verboseOption = verboseOption; + Add(inputFiles); + Add(outputFile); + Add(contractFile); + SetAction(Run); + } + + private async Task Run(ParseResult parse, CancellationToken token = default) + { + var inputs = parse.GetValue(inputFiles); + if (inputs == null || inputs.Length == 0) + { + Console.Error.WriteLine("No input files specified"); + return 1; + } + var output = parse.GetValue(outputFile); + if (output == null) + { + Console.Error.WriteLine("No output file specified"); + return 1; + } + var contracts = parse.GetValue(contractFile); + var verbose = parse.GetValue(_verboseOption); + var builder = new DataDescriptorModel.Builder(); + var scraper = new ObjectFileScraper(verbose, builder); + foreach (var input in inputs) + { + token.ThrowIfCancellationRequested(); + if (!await scraper.ScrapeInput(input, token).ConfigureAwait(false)) + { + Console.Error.WriteLine($"could not scrape payload in {input}"); + return 1; + } + } + if (contracts != null) + { + var contractReader = new ContractReader(builder); + foreach (var contract in contracts) + { + if (!await contractReader.ParseContracts(contract, token).ConfigureAwait(false)) + { + Console.Error.WriteLine($"could not parse contracts in {contract}"); + return 1; + } + } + } + + var model = builder.Build(); + if (verbose) + { + model.DumpModel(); + } + EnsureDirectoryExists(output); + using var writer = new System.IO.StreamWriter(output); + var emitter = new ContractDescriptorSourceFileEmitter(); + emitter.SetPlatformFlags(model.PlatformFlags); + emitter.SetPointerDataCount(model.PointerDataCount); + emitter.SetJsonDescriptor(model.ToJson()); + emitter.Emit(writer); + await writer.FlushAsync(token).ConfigureAwait(false); + return 0; + } + + private static void EnsureDirectoryExists(string outputPath) + { + var directory = System.IO.Path.GetDirectoryName(outputPath); + if (directory == null) + { + return; + } + System.IO.Directory.CreateDirectory(directory); + } +} diff --git a/src/coreclr/tools/cdac-build-tool/ContractDescriptorSourceFileEmitter.cs b/src/coreclr/tools/cdac-build-tool/ContractDescriptorSourceFileEmitter.cs new file mode 100644 index 0000000000000..dde27a6858c6d --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/ContractDescriptorSourceFileEmitter.cs @@ -0,0 +1,81 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Text.RegularExpressions; + +namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +public partial class ContractDescriptorSourceFileEmitter +{ + public const string TemplateResourceName = "Microsoft.DotNet.Diagnostics.DataContract.Resources.contract-descriptor.c.in"; + private const string JsonDescriptorKey = "jsonDescriptor"; + private const string JsonDescriptorSizeKey = "jsonDescriptorSize"; + private const string PointerDataCount = "pointerDataCount"; + private const string PlatformFlags = "platformFlags"; + + + [GeneratedRegex("%%([a-zA-Z0-9_]+)%%", RegexOptions.CultureInvariant)] + private static partial Regex FindTemplatePlaceholderRegex(); + + internal static Stream GetTemplateStream() + { + return typeof(ContractDescriptorSourceFileEmitter).Assembly.GetManifestResourceStream(TemplateResourceName)!; + } + + internal static string GetTemplateString() + { + using var reader = new StreamReader(GetTemplateStream(), System.Text.Encoding.UTF8); + return reader.ReadToEnd(); + } + + public void SetPointerDataCount(int count) + { + Elements[PointerDataCount] = count.ToString(); + } + + public void SetPlatformFlags(uint platformFlags) + { + Elements[PlatformFlags] = $"0x{platformFlags:x8}"; + } + + /// The jsonDescriptor should not be C escaped + public void SetJsonDescriptor(string jsonDescriptor) + { + var count = jsonDescriptor.Length; // return the length before escaping + var escaped = CStringEscape().Replace(jsonDescriptor, "\\$1"); + Elements[JsonDescriptorKey] = escaped; + Elements[JsonDescriptorSizeKey] = count.ToString(); + } + + [GeneratedRegex("(\")", RegexOptions.CultureInvariant)] + private static partial Regex CStringEscape(); + + public Dictionary Elements { get; } = new(); + + public void Emit(TextWriter dest) + { + var template = GetTemplateString(); + var matches = FindTemplatePlaceholderRegex().Matches(template); + var prevPos = 0; + foreach (Match match in matches) + { + // copy everything from the end of the last match (prevPos) to just before the current match to the output + dest.Write(template.AsSpan(prevPos, match.Index - prevPos)); + + // lookup the capture key and write it out + + var key = match.Groups[1].Captures[0].Value; + if (!Elements.TryGetValue(key, out string? result)) + { + throw new InvalidOperationException ($"no replacement for {key}"); + } + dest.Write(result); + prevPos = match.Index + match.Length; + } + // write everything from the prevPos to the end of the template + dest.Write(template.AsSpan(prevPos)); + } +} diff --git a/src/coreclr/tools/cdac-build-tool/ContractReader.cs b/src/coreclr/tools/cdac-build-tool/ContractReader.cs new file mode 100644 index 0000000000000..3bd8ec0c77e7f --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/ContractReader.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +public class ContractReader +{ + private readonly DataDescriptorModel.Builder _builder; + + private static readonly JsonSerializerOptions s_jsonSerializerOptions = new() { PropertyNameCaseInsensitive = false, ReadCommentHandling = JsonCommentHandling.Skip }; + + public ContractReader(DataDescriptorModel.Builder builder) + { + _builder = builder; + } + + public async Task ParseContracts(string contractFilePath, CancellationToken token = default) + { + string? contents = await File.ReadAllTextAsync(contractFilePath, token).ConfigureAwait(false); + var contracts = JsonSerializer.Deserialize>(contents, s_jsonSerializerOptions); + if (contracts is null) + return false; + _builder.AddOrupdateContracts(contracts); + return true; + } +} diff --git a/src/coreclr/tools/cdac-build-tool/DataDescriptorModel.cs b/src/coreclr/tools/cdac-build-tool/DataDescriptorModel.cs new file mode 100644 index 0000000000000..3abeb5c56e83c --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/DataDescriptorModel.cs @@ -0,0 +1,389 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.DotNet.Diagnostics.DataContract.JsonConverter; + +namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +public class DataDescriptorModel +{ + public int Version => 0; + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)] + public string Baseline { get; } + public IReadOnlyDictionary Types { get; } + public IReadOnlyDictionary Globals { get; } + public IReadOnlyDictionary Contracts { get; } + [JsonIgnore] + public uint PlatformFlags { get; } + // The number of indirect globals plus 1 for the placeholder at index 0 + [JsonIgnore] + public int PointerDataCount => 1 + Globals.Values.Count(g => g.Value.Indirect); + + private DataDescriptorModel(string baseline, IReadOnlyDictionary types, IReadOnlyDictionary globals, IReadOnlyDictionary contracts, uint platformFlags) + { + Baseline = baseline; + Types = types; + Globals = globals; + Contracts = contracts; + PlatformFlags = platformFlags; + } + + public const string PointerTypeName = "pointer"; + + internal void DumpModel() + { + Console.WriteLine("\nData Descriptor Model:"); + Console.WriteLine($"Platform Flags: 0x{PlatformFlags:x8}"); + Console.WriteLine($"Baseline: {Baseline}"); + foreach (var (typeName, type) in Types) + { + Console.WriteLine($"Type: {typeName}"); + if (type.Size != null) + { + Console.WriteLine($" Size: 0x{type.Size:x8}"); + } + foreach (var (fieldName, field) in type.Fields) + { + Console.WriteLine($" Field: {fieldName}"); + Console.WriteLine($" Type: {field.Type}"); + Console.WriteLine($" Offset: 0x{field.Offset:x8}"); + } + } + foreach (var (globalName, global) in Globals) + { + Console.WriteLine($"Global: {globalName}"); + Console.WriteLine($" Type: {global.Type}"); + Console.WriteLine($" Value: {global.Value}"); + } + foreach (var (contractName, contract) in Contracts) + { + Console.WriteLine($"Contract: {contractName}"); + Console.WriteLine($" Version: {contract}"); + } + } + + private static JsonSerializerOptions s_jsonSerializerOptions = new JsonSerializerOptions + { + WriteIndented = false, + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + DictionaryKeyPolicy = null, // leave unchanged + }; + public string ToJson() + { + // always writes the "compact" format, see data_descriptor.md + return JsonSerializer.Serialize(this, s_jsonSerializerOptions); + } + + public class Builder + { + private string _baseline; + private bool _baselineParsed; + private readonly Dictionary _types = new(); + private readonly Dictionary _globals = new(); + private readonly Dictionary _contracts = new(); + public Builder() + { + _baseline = string.Empty; + _baselineParsed = false; + } + + public uint PlatformFlags {get; set;} + + public TypeModelBuilder AddOrUpdateType(string name, int? size) + { + if (!_baselineParsed) + { + throw new InvalidOperationException("Baseline must be set before adding types"); + } + if (!_types.TryGetValue(name, out var type)) + { + type = new TypeModelBuilder(); + _types[name] = type; + } + type.Size = size; + return type; + } + + public GlobalBuilder AddOrUpdateGlobal(string name, string type, GlobalValue? value) + { + if (!_baselineParsed) + { + throw new InvalidOperationException("Baseline must be set before adding globals"); + } + if (!_globals.TryGetValue(name, out var global)) + { + global = new GlobalBuilder(); + _globals[name] = global; + } + global.Type = type; + global.Value = value; + return global; + } + + public void AddOrUpdateContract(string name, int version) + { + if (!_contracts.TryGetValue(name, out var contract)) + { + contract = new ContractBuilder(); + _contracts[name] = contract; + } + contract.Version = version; + } + + public void AddOrupdateContracts(IEnumerable> contracts) + { + foreach (var (name, version) in contracts) + { + AddOrUpdateContract(name, version); + } + } + + public void SetBaseline(string baseline) + { + if (_baseline != string.Empty && _baseline != baseline) + { + throw new InvalidOperationException($"Baseline already set to {_baseline} cannot set to {baseline}"); + } + if (EmbeddedBaselines.BaselineNames.Contains(baseline)) + { + _baseline = baseline; + } + else + { + throw new InvalidOperationException($"Baseline '{baseline}' not known"); + } + _baseline = baseline; + if (!_baselineParsed) + { + _baselineParsed = true; // kind of a hack - set it before parsing the baseline, so we can call AddOrUpdateType + ParseBaseline(); + } + } + + private void ParseBaseline() + { + if (_baseline != "empty") + { + throw new InvalidOperationException("TODO: [cdac] - implement baseline parsing"); + } + } + + public DataDescriptorModel Build() + { + var types = new Dictionary(); + foreach (var (typeName, typeBuilder) in _types) + { + types[typeName] = typeBuilder.Build(typeName); + } + var globals = new Dictionary(); + foreach (var (globalName, globalBuilder) in _globals) + { + if (globalBuilder.Type == string.Empty) + { + throw new InvalidOperationException($"Type must be set for global {globalName}"); + } + GlobalValue? v = globalBuilder.Value; + if (v == null) + { + throw new InvalidOperationException($"Value must be set for global {globalName}"); + } + globals[globalName] = new GlobalModel { Type = globalBuilder.Type, Value = v.Value }; + } + var contracts = new Dictionary(); + foreach (var (contractName, contractBuilder) in _contracts) + { + contracts[contractName] = contractBuilder.Build(); + } + return new DataDescriptorModel(_baseline, types, globals, contracts, PlatformFlags); + } + } + + public class TypeModelBuilder + { + private readonly Dictionary _fields = new(); + private int? _size; + public TypeModelBuilder() { } + + public int? Size + { + get => _size; + set + { + if (_size != null && (value == null || _size != (int)value)) + { + throw new InvalidOperationException($"Size already set to {_size} cannot set to {value}"); + } + _size = value; + } + } + + public void AddOrUpdateField(string name, string type, int? offset) + { + if (!_fields.TryGetValue(name, out var field)) + { + field = new FieldBuilder(); + _fields[name] = field; + } + field.Type = type; + field.Offset = offset; + } + + public TypeModel Build(string typeName) + { + var fields = new Dictionary(); + foreach (var (fieldName, fieldBuilder) in _fields) + { + fields.Add(fieldName, fieldBuilder.Build(typeName, fieldName)); + } + return new TypeModel { Size = _size, Fields = fields }; + } + } + + public class GlobalBuilder + { + private string _type = string.Empty; + private GlobalValue? _value; + public string Type + { + get => _type; + set + { + if (_type != string.Empty && _type != value) + { + throw new InvalidOperationException($"Type already set to {_type} cannot set to {value}"); + } + _type = value; + } + } + public GlobalValue? Value + { + get => _value; + set + { + if (_value != null && _value != value) + { + throw new InvalidOperationException($"Value already set to {_value} cannot set to {value}"); + } + _value = value; + } + } + } + internal sealed class FieldBuilder + { + private string _type = string.Empty; + private int? _offset; + public string Type + { + get => _type; + set + { + if (_type != string.Empty && _type != value) + { + throw new InvalidOperationException($"Type already set to {_type} cannot set to {value}"); + } + _type = value; + } + } + + public int? Offset + { + get => _offset; + set + { + if (_offset != null && (value == null || _offset != (int)value)) + { + throw new InvalidOperationException($"Offset already set to {_offset} cannot set to {value}"); + } + _offset = value; + } + } + + public FieldModel Build(string typeName, string fieldName) + { + if (_offset == null) + { + throw new InvalidOperationException($"Offset must be set for {typeName}.{fieldName}"); + } + return new FieldModel { Type = _type, Offset = (int)_offset }; + } + } + + [JsonConverter(typeof(FieldModelJsonConverter))] + public readonly struct FieldModel + { + public string Type { get; init; } + public int Offset { get; init; } + } + + [JsonConverter(typeof(TypeModelJsonConverter))] + public readonly struct TypeModel + { + public int? Size { get; init; } + public IReadOnlyDictionary Fields { get; init; } + } + + [JsonConverter(typeof(GlobalValueJsonConverter))] + public readonly struct GlobalValue : IEquatable + { + public bool Indirect { get; private init; } + public ulong Value { get; } + public static GlobalValue MakeDirect(ulong value) => new GlobalValue(value); + public static GlobalValue MakeIndirect(uint auxDataIdx) => new GlobalValue((ulong)auxDataIdx) { Indirect = true }; + private GlobalValue(ulong value) { Value = value; } + + public static bool operator ==(GlobalValue left, GlobalValue right) => left.Value == right.Value && left.Indirect == right.Indirect; + public static bool operator !=(GlobalValue left, GlobalValue right) => !(left == right); + + public bool Equals(GlobalValue other) => this == other; + public override bool Equals(object? obj) => obj is GlobalValue value && this == value; + public override int GetHashCode() => HashCode.Combine(Value, Indirect); + public override string ToString() => Indirect ? $"Indirect({Value})" : $"0x{Value:x}"; + } + + [JsonConverter(typeof(GlobalModelJsonConverter))] + public readonly struct GlobalModel + { + public string Type { get; init; } + public GlobalValue Value { get; init; } + } + + public class ContractBuilder + { + private int? _version; + public ContractBuilder() + { + } + + public int? Version + { + get => _version; + set + { + if (_version != null && _version != value) + { + throw new InvalidOperationException($"Version already set to {_version} cannot set to {value}"); + } + _version = value; + } + } + + // There is no ContractModel right now because the only info we keep is the version. + // As a result it is convenient to use a Dictionary for the contracts since + // the JSON serialization coincides with what we want. + public int Build() + { + if (_version == null) + { + throw new InvalidOperationException("Version must be set for contract"); + } + return _version.Value; + } + } +} diff --git a/src/coreclr/tools/cdac-build-tool/Directory.Build.props b/src/coreclr/tools/cdac-build-tool/Directory.Build.props new file mode 100644 index 0000000000000..465a55954dfea --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/Directory.Build.props @@ -0,0 +1,7 @@ + + + + <_RequiresLiveILLink>false + + + diff --git a/src/coreclr/tools/cdac-build-tool/EmbeddedBaselines.cs b/src/coreclr/tools/cdac-build-tool/EmbeddedBaselines.cs new file mode 100644 index 0000000000000..ac9cea8b6615e --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/EmbeddedBaselines.cs @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Text.RegularExpressions; + +namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +public partial class EmbeddedBaselines +{ + public const string TemplateResourceNamePrefix = "Microsoft.DotNet.Diagnostics.DataContract.Baseline:"; + public const string TemplateResourceNameEscapePrefix = @"Microsoft\.DotNet\.Diagnostics\.DataContract\.Baseline:"; + public const string TemplateResourceNameExt = ".jsonc"; + public const string TemplateResourceNameEscapeExt = @"\.jsonc"; + + [GeneratedRegex("^" + TemplateResourceNameEscapePrefix + "(.+)" + TemplateResourceNameEscapeExt + "$", RegexOptions.CultureInvariant)] + private static partial Regex BaselineRegex(); + + private static string[] GetBaselineNames() + { + var assembly = typeof(EmbeddedBaselines).Assembly; + var resources = assembly.GetManifestResourceNames(); + var baselineNames = new List(); + foreach (var resource in resources) + { + var match = BaselineRegex().Match(resource); + if (match.Success) + { + baselineNames.Add(match.Groups[1].Value); + } + } + return baselineNames.ToArray(); + } + + private static readonly Lazy> _baselineNames = new(GetBaselineNames); + public static IReadOnlyList BaselineNames => _baselineNames.Value; + + public static string GetBaselineContent(string name) + { + var assembly = typeof(EmbeddedBaselines).Assembly; + var resourceName = TemplateResourceNamePrefix + name + TemplateResourceNameExt; + using var stream = assembly.GetManifestResourceStream(resourceName); + if (stream == null) + { + throw new InvalidOperationException($"Baseline '{name}' not found"); + } + using var reader = new StreamReader(stream); + return reader.ReadToEnd(); + } +} diff --git a/src/coreclr/tools/cdac-build-tool/JsonConverter/FieldModelJsonConverter.cs b/src/coreclr/tools/cdac-build-tool/JsonConverter/FieldModelJsonConverter.cs new file mode 100644 index 0000000000000..5b6631af52a0e --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/JsonConverter/FieldModelJsonConverter.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +namespace Microsoft.DotNet.Diagnostics.DataContract.JsonConverter; + +/// +/// Writes a FieldModel in the compact form of [offset, type] or just offset if type is null. +/// . +/// +public class FieldModelJsonConverter : JsonConverter +{ + public override DataDescriptorModel.FieldModel Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + throw new JsonException(); + } + + public override void Write(Utf8JsonWriter writer, DataDescriptorModel.FieldModel value, JsonSerializerOptions options) + { + if (value.Type is null) + { + writer.WriteNumberValue(value.Offset); + } + else + { + writer.WriteStartArray(); + writer.WriteNumberValue(value.Offset); + writer.WriteStringValue(value.Type); + writer.WriteEndArray(); + } + } +} diff --git a/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalModelJsonConverter.cs b/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalModelJsonConverter.cs new file mode 100644 index 0000000000000..cae54edb6ff10 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalModelJsonConverter.cs @@ -0,0 +1,32 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +namespace Microsoft.DotNet.Diagnostics.DataContract.JsonConverter; +public class GlobalModelJsonConverter : JsonConverter +{ + public override DataDescriptorModel.GlobalModel Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + throw new JsonException(); + } + + public override void Write(Utf8JsonWriter writer, DataDescriptorModel.GlobalModel value, JsonSerializerOptions options) + { + if (value.Type is null) + { + // no type: just write 'value' or '[value]' + JsonSerializer.Serialize(writer, value.Value, options); + } + else + { + // there's a type. Write: [value, type] or [[value], type] + writer.WriteStartArray(); + JsonSerializer.Serialize(writer, value.Value, options); + writer.WriteStringValue(value.Type); + writer.WriteEndArray(); + } + } +} diff --git a/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalValueJsonConverter.cs b/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalValueJsonConverter.cs new file mode 100644 index 0000000000000..429f6cc697928 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/JsonConverter/GlobalValueJsonConverter.cs @@ -0,0 +1,32 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +namespace Microsoft.DotNet.Diagnostics.DataContract.JsonConverter; +public class GlobalValueJsonConverter : JsonConverter +{ + public override DataDescriptorModel.GlobalValue Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + throw new JsonException(); + } + + public override void Write(Utf8JsonWriter writer, DataDescriptorModel.GlobalValue value, JsonSerializerOptions options) + { + if (!value.Indirect) + { + // no type: just write value as a number. + // we always write as a string containing a hex number + writer.WriteStringValue($"0x{value.Value:x}"); + } + else + { + // pointer data index. write as a 1-element array containing a decimal number + writer.WriteStartArray(); + writer.WriteNumberValue(value.Value); + writer.WriteEndArray(); + } + } +} diff --git a/src/coreclr/tools/cdac-build-tool/JsonConverter/TypeModelJsonConverter.cs b/src/coreclr/tools/cdac-build-tool/JsonConverter/TypeModelJsonConverter.cs new file mode 100644 index 0000000000000..bf400253d5ddf --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/JsonConverter/TypeModelJsonConverter.cs @@ -0,0 +1,32 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +using System; +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +namespace Microsoft.DotNet.Diagnostics.DataContract.JsonConverter; +public class TypeModelJsonConverter : JsonConverter +{ + public const string SizePropertyname = "!"; + + public override DataDescriptorModel.TypeModel Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + throw new JsonException(); + } + + public override void Write(Utf8JsonWriter writer, DataDescriptorModel.TypeModel value, JsonSerializerOptions options) + { + writer.WriteStartObject(); + if (value.Size is not null) + { + writer.WriteNumber(SizePropertyname, value.Size.Value); + } + foreach (var (fieldName, field) in value.Fields) + { + writer.WritePropertyName(fieldName); + JsonSerializer.Serialize(writer, field, options); + } + writer.WriteEndObject(); + } +} diff --git a/src/coreclr/tools/cdac-build-tool/ObjectFileScraper.cs b/src/coreclr/tools/cdac-build-tool/ObjectFileScraper.cs new file mode 100644 index 0000000000000..42b0b004c8980 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/ObjectFileScraper.cs @@ -0,0 +1,519 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +public class ObjectFileScraper +{ + public static readonly ReadOnlyMemory MagicLE = new byte[8] { 0x44, 0x41, 0x43, 0x42, 0x4C, 0x4F, 0x42, 0x00 }; // "DACBLOB\0" + public static readonly ReadOnlyMemory MagicBE = new byte[8] { 0x00, 0x42, 0x4F, 0x4C, 0x42, 0x43, 0x41, 0x44 }; + + private readonly DataDescriptorModel.Builder _builder; + + public bool Verbose {get;} + public ObjectFileScraper(bool verbose, DataDescriptorModel.Builder builder) + { + Verbose = verbose; + _builder = builder; + } + + public async Task ScrapeInput(string inputPath, CancellationToken token) + { + var bytes = await File.ReadAllBytesAsync(inputPath, token).ConfigureAwait(false); + if (!ScraperState.CreateScraperState(bytes, out var state)) + { + return false; + } + if (Verbose) + { + Console.WriteLine($"Magic starts at 0x{state.MagicStart:x8} in {inputPath}"); + } + var header = ReadHeader(state); + if (Verbose) + { + DumpHeaderDirectory(header); + } + var content = ReadContent(state, header); + content.AddToModel(_builder); + if (Verbose) + { + Console.WriteLine($"\nFinished scraping content from {inputPath}"); + } + return true; + } + + private sealed class ScraperState + { + public ReadOnlyMemory Data { get; } + public bool LittleEndian { get; } + private long _position; + + // expect MagicLE and MagicBE to have the same length + public long MagicStart => HeaderStart - MagicLE.Length; + public long HeaderStart { get; } + + private ScraperState(ReadOnlyMemory data, bool isLittleEndian, long headerStart) + { + Data = data; + LittleEndian = isLittleEndian; + HeaderStart = headerStart; + _position = headerStart; + } + + public static bool CreateScraperState(ReadOnlyMemory bytes, [NotNullWhen(true)] out ScraperState? scraperState) + { + if (FindMagic(bytes.Span, out int offset, out bool isLittleEndian)) + { + scraperState = new ScraperState(bytes, isLittleEndian, offset + MagicLE.Length); + return true; + } + scraperState = null; + return false; + } + + private static bool FindMagic(ReadOnlySpan buffer, out int offset, out bool isLittleEndian) + { + int start = buffer.IndexOf(MagicLE.Span); + if (start != -1) + { + offset = start; + isLittleEndian = true; + return true; + } + start = buffer.IndexOf(MagicBE.Span); + if (start != -1) + { + offset = start; + isLittleEndian = false; + return true; + } + offset = 0; + isLittleEndian = false; + return false; + } + + public ulong GetUInt64(long offset) => LittleEndian ? BinaryPrimitives.ReadUInt64LittleEndian(Data.Span.Slice((int)offset)) : BinaryPrimitives.ReadUInt64BigEndian(Data.Span.Slice((int)offset)); + public uint GetUInt32(long offset) => LittleEndian ? BinaryPrimitives.ReadUInt32LittleEndian(Data.Span.Slice((int)offset)) : BinaryPrimitives.ReadUInt32BigEndian(Data.Span.Slice((int)offset)); + public ushort GetUInt16(long offset) => LittleEndian ? BinaryPrimitives.ReadUInt16LittleEndian(Data.Span.Slice((int)offset)) : BinaryPrimitives.ReadUInt16BigEndian(Data.Span.Slice((int)offset)); + public byte GetByte(long offset) => Data.Span[(int)offset]; + + public ReadOnlySpan GetBytes(long offset, int length) => Data.Span.Slice((int)offset, length); + + public void ResetPosition(long position) + { + _position = position; + } + + public ulong ReadUInt64() + { + var value = GetUInt64(_position); + _position += sizeof(ulong); + return value; + } + public uint ReadUInt32() + { + var value = GetUInt32(_position); + _position += sizeof(uint); + return value; + } + public ushort ReadUInt16() + { + var value = GetUInt16(_position); + _position += sizeof(ushort); + return value; + } + + public byte ReadByte() + { + var value = GetByte(_position); + _position += sizeof(byte); + return value; + } + public void ReadBytes(Span buffer) + { + GetBytes(_position, buffer.Length).CopyTo(buffer); + _position += buffer.Length; + } + + public void Skip(int count) + { + _position += count; + } + } + + // see typedef Directory in data-descriptor-blob.md + private struct HeaderDirectory + { + public uint FlagsAndBaselineStart; + public uint TypesStart; + + public uint FieldsPoolStart; + public uint GlobalLiteralValuesStart; + + public uint GlobalPointersStart; + public uint NamesStart; + + public uint TypesCount; + public uint FieldsPoolCount; + + public uint GlobalLiteralValuesCount; + public uint GlobalPointerValuesCount; + + public uint NamesPoolCount; + + public byte TypeSpecSize; + public byte FieldSpecSize; + public byte GlobalLiteralSpecSize; + public byte GlobalPointerSpecSize; + }; + + private static void DumpHeaderDirectory(HeaderDirectory headerDirectory) + { + Console.WriteLine($""" + Scaped Header Directory: + + Baseline Start = 0x{headerDirectory.FlagsAndBaselineStart:x8} + Types Start = 0x{headerDirectory.TypesStart:x8} + Fields Pool Start = 0x{headerDirectory.FieldsPoolStart:x8} + Global Literals Start = 0x{headerDirectory.GlobalLiteralValuesStart:x8} + Global Pointers Start = 0x{headerDirectory.GlobalPointersStart:x8} + Names Pool Start = 0x{headerDirectory.NamesStart:x8} + + Types Count = {headerDirectory.TypesCount} + Fields Pool Count = {headerDirectory.FieldsPoolCount} + Global Literal Values Count = {headerDirectory.GlobalLiteralValuesCount} + Global Pointer Values Count = {headerDirectory.GlobalPointerValuesCount} + Names Pool Count = {headerDirectory.NamesPoolCount} + + """); + } + + private static HeaderDirectory ReadHeader(ScraperState state) + { + state.ResetPosition(state.HeaderStart); + var baselineStart = state.ReadUInt32(); + var typesStart = state.ReadUInt32(); + + var fieldPoolStart = state.ReadUInt32(); + var globalLiteralValuesStart = state.ReadUInt32(); + + var globalPointersStart = state.ReadUInt32(); + var namesStart = state.ReadUInt32(); + + var typeCount = state.ReadUInt32(); + var fieldPoolCount = state.ReadUInt32(); + + var globalLiteralValuesCount = state.ReadUInt32(); + var globalPointerValuesCount = state.ReadUInt32(); + + var namesPoolCount = state.ReadUInt32(); + + var typeSpecSize = state.ReadByte(); + var fieldSpecSize = state.ReadByte(); + var globalLiteralSpecSize = state.ReadByte(); + var globalPointerSpecSize = state.ReadByte(); + + return new HeaderDirectory { + FlagsAndBaselineStart = baselineStart, + TypesStart = typesStart, + FieldsPoolStart = fieldPoolStart, + GlobalLiteralValuesStart = globalLiteralValuesStart, + GlobalPointersStart = globalPointersStart, + NamesStart = namesStart, + + TypesCount = typeCount, + FieldsPoolCount = fieldPoolCount, + + GlobalLiteralValuesCount = globalLiteralValuesCount, + GlobalPointerValuesCount = globalPointerValuesCount, + + NamesPoolCount = namesPoolCount, + + TypeSpecSize = typeSpecSize, + FieldSpecSize = fieldSpecSize, + GlobalLiteralSpecSize = globalLiteralSpecSize, + GlobalPointerSpecSize = globalPointerSpecSize, + }; + } + + private struct TypeSpec + { + public uint NameIdx; + public uint FieldsIdx; + public ushort? Size; + } + + private struct FieldSpec + { + public uint NameIdx; + public uint TypeNameIdx; + public ushort FieldOffset; + } + + // Like a FieldSpec but with names resolved + private struct FieldEntry + { + public string Name; + public string Type; + public ushort Offset; + } + + private struct GlobalLiteralSpec + { + public uint NameIdx; + public uint TypeNameIdx; + public ulong Value; + } + + private struct GlobalPointerSpec + { + public uint NameIdx; + public uint AuxDataIdx; + } + + private sealed class Content + { + public required bool Verbose {get; init; } + public required uint PlatformFlags { get; init; } + public required uint Baseline { get; init; } + public required IReadOnlyList TypeSpecs { get; init; } + public required IReadOnlyList FieldSpecs { get; init; } + public required IReadOnlyList GlobaLiteralSpecs { get; init; } + public required IReadOnlyList GlobalPointerSpecs { get; init; } + public required ReadOnlyMemory NamesPool { get; init; } + + internal string GetPoolString(uint stringIdx) + { + var nameStart = NamesPool.Span.Slice((int)stringIdx); + var end = nameStart.IndexOf((byte)0); // find the first nul after index + if (end == -1) + throw new InvalidOperationException("expected a nul-terminated name"); + var nameBytes = nameStart.Slice(0, end); + return System.Text.Encoding.UTF8.GetString(nameBytes); + } + + public void AddToModel(DataDescriptorModel.Builder builder) + { + WriteVerbose("\nAdding scraped content to model"); + builder.PlatformFlags = PlatformFlags; + string baseline = GetPoolString(Baseline); + WriteVerbose($"Baseline Name = {baseline}"); + builder.SetBaseline(baseline); + + + FieldEntry[] fields = FieldSpecs.Select((fieldSpec) => + (fieldSpec.NameIdx != 0) ? + new FieldEntry + { + Name = GetPoolString(fieldSpec.NameIdx), + Type = GetPoolString(fieldSpec.TypeNameIdx), + Offset = fieldSpec.FieldOffset + } : + default + ).ToArray(); + + foreach (var typeSpec in TypeSpecs) + { + string typeName = GetPoolString(typeSpec.NameIdx); + var typeBuilder = builder.AddOrUpdateType(typeName, typeSpec.Size); + uint j = typeSpec.FieldsIdx; // convert byte offset to index; + WriteVerbose($"Type {typeName} has fields starting at index {j}"); + while (j < fields.Length && !string.IsNullOrEmpty(fields[j].Name)) + { + typeBuilder.AddOrUpdateField(fields[j].Name, fields[j].Type, fields[j].Offset); + WriteVerbose($"Type {typeName} has field {fields[j].Name} with offset {fields[j].Offset}"); + j++; + } + if (typeSpec.Size is not null) + { + WriteVerbose($"Type {typeName} has size {typeSpec.Size}"); + } + else + { + WriteVerbose($"Type {typeName} has indeterminate size"); + } + } + + foreach (var globalSpec in GlobaLiteralSpecs) + { + var globalName = GetPoolString(globalSpec.NameIdx); + var globalType = GetPoolString(globalSpec.TypeNameIdx); + var globalValue = DataDescriptorModel.GlobalValue.MakeDirect(globalSpec.Value); + builder.AddOrUpdateGlobal(globalName, globalType, globalValue); + WriteVerbose($"Global {globalName} has type {globalType} with value {globalValue}"); + } + + foreach (var globalPointer in GlobalPointerSpecs) + { + var globalName = GetPoolString(globalPointer.NameIdx); + var auxDataIdx = globalPointer.AuxDataIdx; + var globalValue = DataDescriptorModel.GlobalValue.MakeIndirect(auxDataIdx); + builder.AddOrUpdateGlobal(globalName, DataDescriptorModel.PointerTypeName, globalValue); + WriteVerbose($"Global pointer {globalName} has index {globalValue}"); + } + } + + private void WriteVerbose(string msg) + { + if (Verbose) + Console.WriteLine(msg); + } + } + + private Content ReadContent(ScraperState state, HeaderDirectory header) + { + WriteVerbose("\nReading scraped content"); + state.ResetPosition(state.HeaderStart + header.FlagsAndBaselineStart); + var platformFlags = state.ReadUInt32(); + var baselineNameIdx = state.ReadUInt32(); + WriteVerbose($"flags = 0x{platformFlags:x8}, baseline Name Idx = {baselineNameIdx}"); + + TypeSpec[] typeSpecs = ReadTypeSpecs(state, header); + FieldSpec[] fieldSpecs = ReadFieldSpecs(state, header); + GlobalLiteralSpec[] globalLiteralSpecs = ReadGlobalLiteralSpecs(state, header); + GlobalPointerSpec[] globalPointerSpecs = ReadGlobalPointerSpecs(state, header); + byte[] namesPool = ReadNamesPool(state, header); + + byte[] endMagic = new byte[4]; + state.ReadBytes(endMagic.AsSpan()); + if (!CheckEndMagic(endMagic)) + { + throw new InvalidOperationException($"expected endMagic, got 0x{endMagic[0]:x} 0x{endMagic[1]:x} 0x{endMagic[2]:x} 0x{endMagic[3]:x}"); + } + else + { + WriteVerbose("\nFound correct endMagic at end of content"); + } + return new Content + { + Verbose = Verbose, + PlatformFlags = platformFlags, + Baseline = baselineNameIdx, + TypeSpecs = typeSpecs, + FieldSpecs = fieldSpecs, + GlobaLiteralSpecs = globalLiteralSpecs, + GlobalPointerSpecs = globalPointerSpecs, + NamesPool = namesPool + }; + } + + private TypeSpec[] ReadTypeSpecs(ScraperState state, HeaderDirectory header) + { + TypeSpec[] typeSpecs = new TypeSpec[header.TypesCount]; + + state.ResetPosition(state.HeaderStart + (long)header.TypesStart); + for (int i = 0; i < header.TypesCount; i++) + { + int bytesRead = 0; + typeSpecs[i].NameIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + typeSpecs[i].FieldsIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + ushort size = state.ReadUInt16(); + bytesRead += sizeof(ushort); + if (size != 0) + { + typeSpecs[i].Size = size; + } + WriteVerbose($"TypeSpec[{i}]: NameIdx = {typeSpecs[i].NameIdx}, FieldsIdx = {typeSpecs[i].FieldsIdx}, Size = {typeSpecs[i].Size}"); + // skip padding + if (bytesRead < header.TypeSpecSize) + { + state.Skip(header.TypeSpecSize - bytesRead); + } + } + return typeSpecs; + } + + private static FieldSpec[] ReadFieldSpecs(ScraperState state, HeaderDirectory header) + { + state.ResetPosition(state.HeaderStart + (long)header.FieldsPoolStart); + FieldSpec[] fieldSpecs = new FieldSpec[header.FieldsPoolCount]; + for (int i = 0; i < header.FieldsPoolCount; i++) + { + int bytesRead = 0; + fieldSpecs[i].NameIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + fieldSpecs[i].TypeNameIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + fieldSpecs[i].FieldOffset = state.ReadUInt16(); + bytesRead += sizeof(ushort); + // skip padding + if (bytesRead < header.FieldSpecSize) + { + state.Skip(header.FieldSpecSize - bytesRead); + } + } + return fieldSpecs; + } + + private static GlobalLiteralSpec[] ReadGlobalLiteralSpecs(ScraperState state, HeaderDirectory header) + { + GlobalLiteralSpec[] globalSpecs = new GlobalLiteralSpec[header.GlobalLiteralValuesCount]; + state.ResetPosition(state.HeaderStart + (long)header.GlobalLiteralValuesStart); + for (int i = 0; i < header.GlobalLiteralValuesCount; i++) + { + int bytesRead = 0; + globalSpecs[i].NameIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + globalSpecs[i].TypeNameIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + globalSpecs[i].Value = state.ReadUInt64(); + bytesRead += sizeof(ulong); + // skip padding + if (bytesRead < header.GlobalLiteralSpecSize) + { + state.Skip(header.GlobalLiteralSpecSize - bytesRead); + } + } + return globalSpecs; + } + + private static GlobalPointerSpec[] ReadGlobalPointerSpecs(ScraperState state, HeaderDirectory header) + { + GlobalPointerSpec[] globalSpecs = new GlobalPointerSpec[header.GlobalPointerValuesCount]; + state.ResetPosition(state.HeaderStart + (long)header.GlobalPointersStart); + for (int i = 0; i < header.GlobalPointerValuesCount; i++) + { + int bytesRead = 0; + globalSpecs[i].NameIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + globalSpecs[i].AuxDataIdx = state.ReadUInt32(); + bytesRead += sizeof(uint); + // skip padding + if (bytesRead < header.GlobalPointerSpecSize) + { + state.Skip(header.GlobalPointerSpecSize - bytesRead); + } + } + return globalSpecs; + } + + private static byte[] ReadNamesPool(ScraperState state, HeaderDirectory header) + { + byte[] namesPool = new byte[header.NamesPoolCount]; + state.ResetPosition(state.HeaderStart + (long)header.NamesStart); + state.ReadBytes(namesPool.AsSpan()); + return namesPool; + } + + private static bool CheckEndMagic(ReadOnlySpan bytes) + { + return (bytes[0] == 0x01 && bytes[1] == 0x02 && bytes[2] == 0x03 && bytes[3] == 0x04); + } + + private void WriteVerbose(string msg) + { + if (Verbose) + Console.WriteLine(msg); + } +} diff --git a/src/coreclr/tools/cdac-build-tool/Program.cs b/src/coreclr/tools/cdac-build-tool/Program.cs new file mode 100644 index 0000000000000..132c13d30fa11 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/Program.cs @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.CommandLine; +using System.Threading.Tasks; + +namespace Microsoft.DotNet.Diagnostics.DataContract.BuildTool; + +public class Program +{ + public static async Task Main(string[] args) + { + CliRootCommand rootCommand = new (); + var verboseOption = new CliOption("-v", "--verbose") {Recursive = true, Description = "Verbose"}; + rootCommand.Add(verboseOption); + rootCommand.Add(new DiagramDirective()); + rootCommand.Add(new ComposeCommand(verboseOption)); + return await rootCommand.Parse(args).InvokeAsync().ConfigureAwait(true); + } +} diff --git a/src/coreclr/tools/cdac-build-tool/README.md b/src/coreclr/tools/cdac-build-tool/README.md new file mode 100644 index 0000000000000..512026a29860b --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/README.md @@ -0,0 +1,134 @@ +# cDAC Build Tool + +## Summary + +The purpose of `cdac-build-tool` is to generate a `.c` file that contains a JSON cDAC contract descriptor. + +It works by processing one or more object files containing data descriptors and zero or more text +files that specify contracts. + +## Running + +```console +% cdac-build-tool compose [-v] -o contractdescriptor.c -c contracts.txt datadescriptor.o +``` +## .NET runtime build integration + +`cdac-build-tool` is meant to run as a CMake custom command. +It consumes a target platform object file and emits a C source +file that contains a JSON contract descriptor. The C source +is then included in the normal build and link steps to create the runtime. + +The contract descriptor source file depends on `contractpointerdata.c` which is a source file that contains +the definitions of the "indirect pointer data" that is referenced by the data descriptor. This is typically the addresses of important global variables in the runtime. +Constants and build flags are embedded directly in the JSON payload. + +Multiple data descriptor source files may be specified (for example if they are produced by different components of the runtime, or by different source languages). The final JSON payload will be a composition of all the data descriptors. + +Multiple contracts text files may be specified. This may be useful if some contracts are conditionally included (for example if they are platform-specific). The final JSON payload will be a composition of all the contracts files. + +In the C/C++ data descriptor, we use a single header file `datadescriptor.h` together with the C preprocessor to produce `datadescriptor.c` and `contractpointerdata.c`. +This is an implementation detail. For data structures defined in other languages, other tools can be used to produce the object file and indirect pointer data. + +```mermaid +flowchart TB + headers("runtime headers") + data_header("datadescriptor.h") + data_src("datadescriptor.c") + compile_data["clang"] + data_obj("datadescriptor.o") + contracts("contracts.txt") + globals("contractpointerdata.c") + build[["cdac-build-tool"]] + descriptor_src("contractdescriptor.c") + vm("runtime sources") + compile_runtime["clang"] + runtime_lib(["libcoreclr.so"]) + + headers -.-> data_src + headers ~~~ data_header + data_header -.-> data_src + headers -.-> globals + headers -.-> vm + data_src --> compile_data --> data_obj --> build + contracts ---> build + build --> descriptor_src + descriptor_src --> compile_runtime + data_header -.-> globals ----> compile_runtime + vm ----> compile_runtime --> runtime_lib +``` + + +## Specifying data descriptors + +The sample in the `sample` dir uses the following syntax (see [sample/sample.data.h](sample/sample.data.h)) to specify the data descriptor: + +```c +CDAC_BASELINE("empty") +CDAC_TYPES_BEGIN() + +CDAC_TYPE_BEGIN(ManagedThread) +CDAC_TYPE_INDETERMINATE(ManagedThread) +CDAC_TYPE_FIELD(ManagedThread, GCHandle, GCHandle, offsetof(ManagedThread,m_gcHandle)) +CDAC_TYPE_FIELD(ManagedThread, pointer, Next, offsetof(ManagedThread,m_next)) +CDAC_TYPE_END(ManagedThread) + +CDAC_TYPE_BEGIN(GCHandle) +CDAC_TYPE_SIZE(sizeof(intptr_t)) +CDAC_TYPE_END(GCHandle) + +CDAC_TYPES_END() + +CDAC_GLOBALS_BEGIN() +CDAC_GLOBAL_POINTER(ManagedThreadStore, &g_managedThreadStore) +#if FEATURE_EH_FUNCLETS +CDAC_GLOBAL(FeatureEHFunclets, uint8, 1) +#else +CDAC_GLOBAL(FeatureEHFunclets, uint8, 0) +#endif +CDAC_GLOBAL(SomeMagicNumber, uint32, 42) +CDAC_GLOBALS_END() +``` + +The file is included multiple times with the macros variously defined in order to generate the +data descriptor blob. + +## Implementation Details + +See [data-descriptor-blob.md](./data-descriptor-blob.md) + +## Workflow + +### Porting and extending the data blob scraper + +When porting to a new architecture, or extending the blob contents, it is recommended to +first work with the sample blob, rather than the full CoreCLR descriptor. + +For example, if your target platform has a clang toolchain, something like this will provide a suitable +input for `cdac-build-tool`: + +```console +$ clang -target wasm32-unknown-unknown -c -o /tmp/sample.o src/coreclr/tools/cdac-build-tool/sample/sample.blob.c +``` + +If you are modifying the preprocessor macros, using `-E` to emit the preprocessed output is helpful as well. + +```console +$ clang -target x86_64-unknown-linux-gnu -E -o /tmp/sample.i .src/coreclr/tools/cdac-build-tool/sample/sample.blob.c +``` + +Running the `cdac-build-tool` with the `-v` verbose option will show progress + +```console +$ ./dotnet.sh run --project src/coreclr/tools/cdac-build-tool/cdac-build-tool.csproj -- compose -v -o /tmp/contract.c /tmp/sample.o +``` + +It is also helpful to run the `cdac-build-tool` under a debugger with a breakpoint in `ObjectFileScraper.ScrapeInput` + +**Release runtime builds** When building Release builds of the runtime, the build infrastructure +may turn on whole program optimizations. On some toolchains this may produce object files that +are a serialization of the internal compiler state, rather than a native object format. This may break +assumptions of the `cdac-build-tool` about global symbol initialization, for example constants and string literals might not be stored as binary integers or as byte sequences. In such cases, it may be +necessary to turn off global optimizations when compiling `datadescriptor.cpp`. This is okay to do because `datadescriptor.cpp` is not shipped as part of the runtime build - and in fact it has no executable functions at all. It is just used to gather type layout and size information. + +It is conceivable that some future C/C++ compiler with whole program optimizations turned on may remove unused struct fields. (Such that separately compiling `datadescriptor.cpp` would produce incorrect offsets). In that case, `cdac-build-tool` will need to use another technique to collect offsets for a runtime built with such a compiler. As of 2024, no compilers do this, however. diff --git a/src/coreclr/tools/cdac-build-tool/Resources/contract-descriptor.c.in b/src/coreclr/tools/cdac-build-tool/Resources/contract-descriptor.c.in new file mode 100644 index 0000000000000..c1f0edd7a66f9 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/Resources/contract-descriptor.c.in @@ -0,0 +1,34 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +#ifdef _MSC_VER +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT __attribute__((visibility("default"))) +#endif + +struct DotNetRuntimeContractDescriptor +{ + uint64_t magic; + uint32_t flags; + const uint32_t descriptor_size; + const char *descriptor; + const uint32_t pointer_data_count; + uint32_t pad0; + const uintptr_t *pointer_data; +}; + +extern const uintptr_t contractDescriptorPointerData[]; + +DLLEXPORT struct DotNetRuntimeContractDescriptor DotNetRuntimeContractDescriptor; + +DLLEXPORT struct DotNetRuntimeContractDescriptor DotNetRuntimeContractDescriptor = { + .magic = 0x0043414443434e44ull, // "DNCCDAC\0" + .flags = %%platformFlags%%, + .descriptor_size = %%jsonDescriptorSize%%, + .descriptor = "%%jsonDescriptor%%", + .pointer_data_count = %%pointerDataCount%%, + .pointer_data = &contractDescriptorPointerData[0], +}; diff --git a/src/coreclr/tools/cdac-build-tool/cdac-build-tool.csproj b/src/coreclr/tools/cdac-build-tool/cdac-build-tool.csproj new file mode 100644 index 0000000000000..427a1eb4e9c91 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/cdac-build-tool.csproj @@ -0,0 +1,32 @@ + + + + cdac-build-tool + Exe + enable + AnyCPU + $(NetCoreAppToolCurrent) + true + $(RuntimeBinDir)/cdac-build-tool + true + false + false + false + .NET runtime data contract build tool + Microsoft.DotNet.Diagnostics.DataContract + + + + + + + + Microsoft.DotNet.Diagnostics.DataContract.Baseline: + + + + + + + + diff --git a/src/coreclr/tools/cdac-build-tool/data-descriptor-blob.md b/src/coreclr/tools/cdac-build-tool/data-descriptor-blob.md new file mode 100644 index 0000000000000..b7321edd12c99 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/data-descriptor-blob.md @@ -0,0 +1,203 @@ +## On-disk target object binary blob descriptor + +### Summary + +This is an internal implemetnation detail allowing tooling to read target architecture structure sizes and offsets without understanding target architecture object formats. + +### Design requirements + +The design of the physical binary blob descriptor is constrained by the following requirements: +* The binary blob should be easy to process by examining an object file on disk - even if the object + file is for a foreign architecture/OS. It should be possible to read the binary blob purely by + looking at the bytes. Tooling should be able to analyze the blob without having to understand + relocation entries, dwarf debug info, symbols etc. +* It should be possible to produce the blob using the native C/C++/NativeAOT compiler for a given + target/architecture. In particular for a runtime written in C, the binary blob should be + constructible using C idioms. If the C compiler needs to pad or align the data, the blob format + should provide a way to iterate the blob contents without having to know anything about the target + platform ABI or C compiler conventions. +* It should be possible to create separate subsets of the physical descriptor (in the target runtime + object format) using separate toolchains (for example: in NativeAOT some of the struct layouts may + be described by the NativeAOT compiler, while some might be described by the C/C++ toolchain) and + to run a build host (not target architecture) tool to read and compose them into a single physical + binary blob before embedding it into the final NativeAOT runtime binary. + +This leads to the following overall strategy for the design: +* The physical blob is "self-contained": indirections are encoded as offsets from the beginning of + the blob (or other base offsets), whereas using pointers would mean that the encoding of the blob + would have relocations applied to it, which would preclude reading the blob out of of an object + file without understanding the object file format. +* The physical blob must be "self-describing": If the C compiler adds padding or alignment, the blob + descriptor must contain information for how to skip the padding/alignment data. +* The physical blob must be constructible using "lowest common denominator" target toolchain + tooling - the C preprocessor. That doesn't mean that tooling _must_ use the C preprocessor to + generate the blob, but the format must not exceed the capabilities of the C preprocessor. + + +### Blob + +Multi-byte values are in the target platform endianness. + +The blob's job is to encode descriptions of the .NET runtime's implementation types and their fields, +as well as globals. + +When encoding strings, we create a "string pool" in the data blob: a massive string literal +that concatentates all the names that we might need, separated by `"\0"` nul characters. To encode a name into another data structure, we write the offset of the name from the beginning of the string pool. We reserve the offset 0 to designate empty or invalid names. + +When encoding the fields of a type, we create a "field pool" in the data blob: a collection of field +descriptors delimited by an "empty field descriptor" (a field descriptor of a name index of 0). All +the fields for a single type are encoded as a contiguous run from a given field pool index until the next empty field descriptor. + +We're interested in encoding the following kinds of information: + +```c +// A type: +// We encode a data contract name and a collection of fields, and the size of the type. +struct TypeSpec +{ + uint32_t Name; + uint32_t Fields; + uint16_t Size; +}; + +// A field: +// We encode the field name, the type (or an empty name) and the offset of the field in the native +// struct. The size of the field is not part of the data descriptor. +struct FieldSpec +{ + uint32_t Name; + uint32_t TypeName; + uint16_t FieldOffset; +}; + +// A literal global value such as a constant, some flags bitmap, or the value of a preprocessor define: +// we record the name, an optional type name, and a value as an unsigned 64-bit value +struct GlobalLiteralSpec +{ + uint32_t Name; + uint32_t TypeName; + uint64_t Value; +}; + +// A global pointer value such as the addrress of some important datastructure: +// We record the name and the index of the global in the auxiliarly "pointer data" global which +// is compiled into the .NET runtime and contains the addresses of all the globals that are referenced +// from the data descriptor. +struct GlobalPointerSpec +{ + uint32_t Name; + uint32_t PointerDataIndex; +}; +``` + +The main data we want to emit to the object file is an instance of the following structure: + +```c +// The main payload of the object file. +struct BinaryBlobDataDescriptor +{ + // A directory giving the offsets of all the other content, + // the number of types, fields, global literals and pointers, and + // the sizes of the "Spec" structs, above, in order to account for any padding added + // by the C/C++ compiler. + struct Directory { + uint32_t FlagsAndBaselineStart; + uint32_t TypesStart; + + uint32_t FieldPoolStart; + uint32_t GlobalLiteralValuesStart; + + uint32_t GlobalPointersStart; + uint32_t NamesStart; + + uint32_t TypeCount; + uint32_t FieldPoolCount; + + uint32_t GlobalLiteralValuesCount; + uint32_t GlobalPointerValuesCount; + + uint32_t NamesPoolCount; + + uint8_t TypeSpecSize; + uint8_t FieldSpecSize; + uint8_t GlobalLiteralSpecSize; + uint8_t GlobalPointerSpecSize; + } Directory; + // Platform flags (primarily pointer size) + uint32_t PlatformFlags; + // a well-known name of the baseline data descriptor. the current descriptor + // records changes from this baseline. + uint32_t BaselineName; + // an array of type specs + struct TypeSpec Types[CDacBlobTypesCount]; + // all of the field specs - contiguous runs are all owned by the same type + struct FieldSpec FieldPool[CDacBlobFieldPoolCount]; + // an array of literal globals + struct GlobalLiteralSpec GlobalLiteralValues[CDacBlobGlobalLiteralsCount]; + // an array of pointer globals + struct GlobalPointerSpec GlobalPointerValues[CDacBlobGlobalPointersCount]; + // all of the names that might be referenced from elsewhere in BinaryBlobDataDescriptor, + // delimited by "\0" + uint8_t NamesPool[sizeof(struct CDacStringPoolSizes)]; + // an end magic value to validate that the name pool is of the expected length + uint8_t EndMagic[4]; // the bytes 0x01 0x02 0x03 0x04 +}; +``` + +Finally, the value that we write to the object file has this form: + +```c +struct MagicAndBlob { + // the magic value that we look for in the object file + // 0x00424F4C42434144ull - in little endian this is "DACBLOB\0" + uint64_t magic; + // the blob payload, described above + struct BinaryBlobDataDescriptor Blob; +}; +``` + +The `BinaryBlobDataDescriptor` begins with a directory that gives the relative offsets of the `PlatformFlags`, `Types`, `FieldPool`, +`GlobalLiteralValues`, `GlobalPointerValues` and `Names` fields of the blob. The number of elements of each of the arrays is +next. This is followed by the sizes of the spec structs. + +Rationale: If a `BinaryBlobDataDescriptor` is created via C macros, we want to embed the `offsetof` +and `sizeof` of the components of the blob into the blob itself without having to account for any +padding that the C compiler may introduce to enforce alignment. Additionally the `Directory` tries +to follow a common C alignment rule (we don't want padding introduced in the directory itself): +N-byte members are aligned to start on N-byte boundaries. + +The baseline is specified as an offset into the names pool. + +The types are given as an array of `TypeSpec` elements. Each one contains an offset into the +`NamesPool` giving the name of the type, An offset into the fields pool indicating the first +specified field of the type, and the size of the type in bytes or 0 if it is indeterminate. + +The fields pool is given as a sequence of `FieldSpec` elements. The fields for each type are given +in a contiguous subsequence and are terminated by a marker `FieldSpec` with a `Name` offset of 0. +(Thus if a type has an empty sequence of fields it just points to a marker field spec directly.) +For each field there is a name that gives an offset in the name pool and an offset indicating the +field's offset. + +The global constants are given as a sequence of `GlobalLiteralSpec` elements. Each global has a +name, type and a value. Globals that are the addresses in target memory, are in `GlobalPointerSpec` +elements. Each pointer element has a name and an index in a separately compiled pointer structure +that is linked into runtime . See +[contract-descriptor.md](/docs/design/datacontracts/contract-descriptor.md) + +The `NamesPool` is a single sequence of utf-8 bytes comprising the concatenation of all the type +field and global names including a terminating nul byte for each name. The same name may occur +multiple times. The names could be referenced by multiple type or multiple fields. (That is, a +clever blob emitter may pool strings). The first name in the name pool is the empty string (with +its nul byte). + +Rationale: we want to reserve the offset 0 as a marker. + +Names are referenced by giving their offset from the beginning of the `NamesPool`. Each name +extends until the first nul byte encountered at or past the beginning of the name. + + +## Example + +An example C header describing some data types is given in [sample.data.h](./sample/sample.data.h). And +example series of C macro preprocessor definitions that produces a constant blob `Blob` is given in +[sample.blob.c](./sample/sample.blob.c) diff --git a/src/coreclr/tools/cdac-build-tool/sample/sample.blob.c b/src/coreclr/tools/cdac-build-tool/sample/sample.blob.c new file mode 100644 index 0000000000000..b90b7eca0e932 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/sample/sample.blob.c @@ -0,0 +1,553 @@ +#include +#include + +// example structures + +typedef struct ManagedThread ManagedThread; + +struct ManagedThread { + uint32_t garbage0; + uint32_t m_gcHandle; + uint32_t garbage1; + ManagedThread *m_next; +}; + +typedef struct ManagedThreadStore { + ManagedThread *threads; +} ManagedThreadStore; + +static ManagedThreadStore g_managedThreadStore; + +// end example structures + +// begin blob definition + +struct TypeSpec +{ + uint32_t Name; + uint32_t Fields; + uint16_t Size; +}; + +struct FieldSpec +{ + uint32_t Name; + uint32_t TypeName; + uint16_t FieldOffset; +}; + +struct GlobalLiteralSpec +{ + uint32_t Name; + uint32_t TypeName; + uint64_t Value; +}; + +struct GlobalPointerSpec +{ + uint32_t Name; + uint32_t AuxIndex; +}; + +#define CONCAT(token1,token2) token1 ## token2 +#define CONCAT4(token1, token2, token3, token4) token1 ## token2 ## token3 ## token4 + +#define MAKE_TYPELEN_NAME(tyname) CONCAT(cdac_string_pool_typename__, tyname) +#define MAKE_FIELDLEN_NAME(tyname,membername) CONCAT4(cdac_string_pool_membername__, tyname, __, membername) +#define MAKE_FIELDTYPELEN_NAME(tyname,membername) CONCAT4(cdac_string_pool_membertypename__, tyname, __, membername) +#define MAKE_GLOBALLEN_NAME(globalname) CONCAT(cdac_string_pool_globalname__, globalname) +#define MAKE_GLOBALTYPELEN_NAME(globalname) CONCAT(cdac_string_pool_globaltypename__, globalname) + +// define a struct where the size of each field is the length of some string. we will use offsetof to get +// the offset of each struct element, which will be equal to the offset of the beginning of that string in the +// string pool. +struct CDacStringPoolSizes +{ + char cdac_string_pool_nil; // make the first real string start at offset 1 +#define DECL_LEN(membername,len) char membername[(len)]; +#define CDAC_BASELINE(name) DECL_LEN(cdac_string_pool_baseline_, (sizeof(name))) +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) DECL_LEN(MAKE_TYPELEN_NAME(name), sizeof(#name)) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) DECL_LEN(MAKE_FIELDLEN_NAME(tyname,membername), sizeof(#membername)) \ + DECL_LEN(MAKE_FIELDTYPELEN_NAME(tyname,membername), sizeof(#membertyname)) +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) +#define CDAC_GLOBAL(name,tyname,value) DECL_LEN(MAKE_GLOBALLEN_NAME(name), sizeof(#name)) \ + DECL_LEN(MAKE_GLOBALTYPELEN_NAME(name), sizeof(#tyname)) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END +}; + +#define GET_TYPE_NAME(name) offsetof(struct CDacStringPoolSizes, MAKE_TYPELEN_NAME(name)) +#define GET_FIELD_NAME(tyname,membername) offsetof(struct CDacStringPoolSizes, MAKE_FIELDLEN_NAME(tyname,membername)) +#define GET_FIELDTYPE_NAME(tyname,membername) offsetof(struct CDacStringPoolSizes, MAKE_FIELDTYPELEN_NAME(tyname,membername)) +#define GET_GLOBAL_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALLEN_NAME(globalname)) +#define GET_GLOBALTYPE_NAME(globalname) offsetof(struct CDacStringPoolSizes, MAKE_GLOBALTYPELEN_NAME(globalname)) + +// count the types +enum +{ + CDacBlobTypesCount = +#define CDAC_BASELINE(name) 0 +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) + 1 +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + , +}; + +// count the field pool size. +// there's 1 placeholder element at the start, and 1 endmarker after each type +enum +{ + CDacBlobFieldsPoolCount = +#define CDAC_BASELINE(name) 1 +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) + 1 +#define CDAC_TYPE_END(name) + 1 +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + , +}; + +// count the literal globals +enum +{ + CDacBlobGlobalLiteralsCount = +#define CDAC_BASELINE(name) 0 +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) +#define CDAC_GLOBAL(name,tyname,value) + 1 +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + , +}; + +// count the aux vector globals +enum +{ + CDacBlobGlobalPointersCount = +#define CDAC_BASELINE(name) 0 +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) + 1 +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + , +}; + + +#define MAKE_TYPEFIELDS_TYNAME(tyname) CONCAT(CDacFieldsPoolTypeStart__, tyname) + +// index of each run of fields. +// we make a struct containing one 1-byte field for each field in the run, and then take the offset of the +// struct to get the index of the run of fields. +// this looks like +// +// struct CDacFieldsPoolSizes { +// char cdac_field_pool_start_placeholder__; +// struct CDacFieldsPoolTypeStart__MethodTable { +// char cdac_fields_pool_member__MethodTable__GCHandle; +// char cdac_fields_pool_member__MethodTable_endmarker; +// } CDacFieldsPoolTypeStart__MethodTable; +// ... +// }; +// +// so that offsetof(struct CDacFieldsPoolSizes, CDacFieldsPoolTypeStart__MethodTable) will give the offset of the +// method table field descriptors in the run of fields +struct CDacFieldsPoolSizes +{ +#define DECL_LEN(membername) char membername; +#define CDAC_BASELINE(name) DECL_LEN(cdac_fields_pool_start_placeholder__) +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) struct MAKE_TYPEFIELDS_TYNAME(name) { +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) DECL_LEN(CONCAT4(cdac_fields_pool_member__, tyname, __, membername)) +#define CDAC_TYPE_END(name) DECL_LEN(CONCAT4(cdac_fields_pool_member__, tyname, _, endmarker)) \ + } MAKE_TYPEFIELDS_TYNAME(name); +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END +#undef DECL_LEN +}; + +#define GET_TYPE_FIELDS(tyname) offsetof(struct CDacFieldsPoolSizes, MAKE_TYPEFIELDS_TYNAME(tyname)) + +// index of each global pointer +// +// struct CDacGlobalPointerIndex +// { +// char placeholder; +// char firstGlobalPointerName; +// char secondGlobalPointerName; +// ... +//} +// +// offsetof (CDACGlobalPointerIndex, NAME) returns the index of the global +struct CDacGlobalPointerIndex +{ +#define DECL_LEN(membername) char membername; +#define CDAC_BASELINE(name) DECL_LEN(cdac_global_pointer_index_start_placeholder__) +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) DECL_LEN(CONCAT(cdac_global_pointer_index__, name)) +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END +}; + +#define GET_GLOBAL_POINTER_INDEX(name) offsetof(struct CDacGlobalPointerIndex, CONCAT(cdac_global_pointer_index__, name)) + +struct BinaryBlobDataDescriptor +{ + // see data-descriptor-blob.md + struct Directory { + uint32_t FlagsAndBaselineStart; + uint32_t TypesStart; + + uint32_t FieldsPoolStart; + uint32_t GlobalLiteralValuesStart; + + uint32_t GlobalPointersStart; + uint32_t NamesPoolStart; + + uint32_t TypeCount; + uint32_t FieldsPoolCount; + + uint32_t GlobalLiteralValuesCount; + uint32_t GlobalPointerValuesCount; + + uint32_t NamesPoolCount; + + uint8_t TypeSpecSize; + uint8_t FieldSpecSize; + uint8_t GlobalLiteralSpecSize; + uint8_t GlobalPointerSpecSize; + } Directory; + uint32_t PlatformFlags; + uint32_t BaselineName; + struct TypeSpec Types[CDacBlobTypesCount]; + struct FieldSpec FieldsPool[CDacBlobFieldsPoolCount]; + struct GlobalLiteralSpec GlobalLiteralValues[CDacBlobGlobalLiteralsCount]; + struct GlobalPointerSpec GlobalPointerValues[CDacBlobGlobalPointersCount]; + uint8_t NamesPool[sizeof(struct CDacStringPoolSizes)]; + uint8_t EndMagic[4]; +}; + +struct MagicAndBlob { + uint64_t magic; + struct BinaryBlobDataDescriptor Blob; +}; + +const struct MagicAndBlob Blob = { + .magic = 0x00424F4C42434144ull,// "DACBLOB", + .Blob = { + .Directory = { + .FlagsAndBaselineStart = offsetof(struct BinaryBlobDataDescriptor, PlatformFlags), + .TypesStart = offsetof(struct BinaryBlobDataDescriptor, Types), + .FieldsPoolStart = offsetof(struct BinaryBlobDataDescriptor, FieldsPool), + .GlobalLiteralValuesStart = offsetof(struct BinaryBlobDataDescriptor, GlobalLiteralValues), + .GlobalPointersStart = offsetof(struct BinaryBlobDataDescriptor, GlobalPointerValues), + .NamesPoolStart = offsetof(struct BinaryBlobDataDescriptor, NamesPool), + .TypeCount = CDacBlobTypesCount, + .FieldsPoolCount = CDacBlobFieldsPoolCount, + .GlobalLiteralValuesCount = CDacBlobGlobalLiteralsCount, + .GlobalPointerValuesCount = CDacBlobGlobalPointersCount, + .NamesPoolCount = sizeof(struct CDacStringPoolSizes), + .TypeSpecSize = sizeof(struct TypeSpec), + .FieldSpecSize = sizeof(struct FieldSpec), + .GlobalLiteralSpecSize = sizeof(struct GlobalLiteralSpec), + .GlobalPointerSpecSize = sizeof(struct GlobalPointerSpec), + }, + .EndMagic = { 0x01, 0x02, 0x03, 0x04 }, + .PlatformFlags = 0x01 | (sizeof(void*) == 4 ? 0x02 : 0), + .BaselineName = offsetof(struct CDacStringPoolSizes, cdac_string_pool_baseline_), + + .NamesPool = ("\0" // starts with a nul +#define CDAC_BASELINE(name) name "\0" +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) #name "\0" +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) #membername "\0" #membertyname "\0" +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) #name "\0" +#define CDAC_GLOBAL(name,tyname,value) #name "\0" #tyname "\0" +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + ), + + .FieldsPool = { +#define CDAC_BASELINE(name) {0,}, +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) { \ + .Name = GET_FIELD_NAME(tyname,membername), \ + .TypeName = GET_FIELDTYPE_NAME(tyname,membername), \ + .FieldOffset = offset, \ +}, +#define CDAC_TYPE_END(name) { 0, }, +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + }, + + .Types = { +#define CDAC_BASELINE(name) +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) { \ + .Name = GET_TYPE_NAME(name), \ + .Fields = GET_TYPE_FIELDS(name), +#define CDAC_TYPE_INDETERMINATE(name) .Size = 0, +#define CDAC_TYPE_SIZE(size) .Size = size, +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) +#define CDAC_TYPE_END(name) }, +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + }, + + .GlobalLiteralValues = { +#define CDAC_BASELINE(name) +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) +#define CDAC_GLOBAL(name,tyname,value) { .Name = GET_GLOBAL_NAME(name), .TypeName = GET_GLOBALTYPE_NAME(name), .Value = value }, +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + }, + + .GlobalPointerValues = { +#define CDAC_BASELINE(name) +#define CDAC_TYPES_BEGIN() +#define CDAC_TYPE_BEGIN(name) +#define CDAC_TYPE_INDETERMINATE(name) +#define CDAC_TYPE_SIZE(size) +#define CDAC_TYPE_FIELD(tyname,membertyname,membername,offset) +#define CDAC_TYPE_END(name) +#define CDAC_TYPES_END() +#define CDAC_GLOBALS_BEGIN() +#define CDAC_GLOBAL_POINTER(name,value) { .Name = GET_GLOBAL_NAME(name), .AuxIndex = GET_GLOBAL_POINTER_INDEX(name) }, +#define CDAC_GLOBAL(name,tyname,value) +#define CDAC_GLOBALS_END() +#include "sample.data.h" +#undef CDAC_BASELINE +#undef CDAC_TYPES_BEGIN +#undef CDAC_TYPES_END +#undef CDAC_TYPE_BEGIN +#undef CDAC_TYPE_INDETERMINATE +#undef CDAC_TYPE_SIZE +#undef CDAC_TYPE_FIELD +#undef CDAC_TYPE_END +#undef DECL_LEN +#undef CDAC_GLOBALS_BEGIN +#undef CDAC_GLOBAL_POINTER +#undef CDAC_GLOBAL +#undef CDAC_GLOBALS_END + }, + } +}; + +// end blob definition diff --git a/src/coreclr/tools/cdac-build-tool/sample/sample.data.h b/src/coreclr/tools/cdac-build-tool/sample/sample.data.h new file mode 100644 index 0000000000000..e4b8bff98b5e4 --- /dev/null +++ b/src/coreclr/tools/cdac-build-tool/sample/sample.data.h @@ -0,0 +1,24 @@ +CDAC_BASELINE("empty") +CDAC_TYPES_BEGIN() + +CDAC_TYPE_BEGIN(ManagedThread) +CDAC_TYPE_INDETERMINATE(ManagedThread) +CDAC_TYPE_FIELD(ManagedThread, GCHandle, GCHandle, offsetof(ManagedThread,m_gcHandle)) +CDAC_TYPE_FIELD(ManagedThread, pointer, Next, offsetof(ManagedThread,m_next)) +CDAC_TYPE_END(ManagedThread) + +CDAC_TYPE_BEGIN(GCHandle) +CDAC_TYPE_SIZE(sizeof(intptr_t)) +CDAC_TYPE_END(GCHandle) + +CDAC_TYPES_END() + +CDAC_GLOBALS_BEGIN() +CDAC_GLOBAL_POINTER(ManagedThreadStore, &g_managedThreadStore) +#if FEATURE_EH_FUNCLETS +CDAC_GLOBAL(FeatureEHFunclets, uint8, 1) +#else +CDAC_GLOBAL(FeatureEHFunclets, uint8, 0) +#endif +CDAC_GLOBAL(SomeMagicNumber, uint32, 42) +CDAC_GLOBALS_END() diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 345d5ac35f00e..220e2684a0d7a 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -391,6 +391,7 @@ set(VM_HEADERS_WKS callhelpers.h callsiteinspect.h callconvbuilder.hpp + cdacoffsets.h ceemain.h clrconfignative.h clrex.h diff --git a/src/coreclr/vm/cdacoffsets.h b/src/coreclr/vm/cdacoffsets.h new file mode 100644 index 0000000000000..317ef41f73603 --- /dev/null +++ b/src/coreclr/vm/cdacoffsets.h @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef CDACOFFSETS_H__ +#define CDACOFFSETS_H__ + +// See data-descriptor.h +// +// If the offset of some field F in class C must be provided to cDAC, but the field is private, the +// class C should declare cdac_offsets as a friend: +// +// friend template struct cdac_offsets; +// +// and provide a specialization cdac_offsets with a constexpr size_t member providing the offset: +// +// template<> struct cdac_offsets { +// static constexpr size_t F_Offset = offsetof(C, F); +// }; +template +struct cdac_offsets +{ +}; + +#endif// CDACOFFSETS_H__ diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 4241893ee522b..67c4b6b83c975 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -121,6 +121,7 @@ #include "gchandleutilities.h" #include "gcinfotypes.h" #include +#include "cdacoffsets.h" class Thread; class ThreadStore; @@ -4076,8 +4077,17 @@ class Thread private: bool m_hasPendingActivation; + + template friend struct ::cdac_offsets; }; +template<> +struct cdac_offsets +{ + static constexpr size_t ExposedObject = offsetof(Thread, m_ExposedObject); + static constexpr size_t Link = offsetof(Thread, m_Link); +}; + // End of class Thread typedef Thread::ForbidSuspendThreadHolder ForbidSuspendThreadHolder; diff --git a/src/native/corehost/apphost/static/singlefilehost.def b/src/native/corehost/apphost/static/singlefilehost.def index e1208056b8320..9d193783f950e 100644 --- a/src/native/corehost/apphost/static/singlefilehost.def +++ b/src/native/corehost/apphost/static/singlefilehost.def @@ -18,3 +18,6 @@ g_dacTable = s_dacGlobals ; Used by profilers MetaDataGetDispenser + +; cDAC contract descriptor +DotNetRuntimeContractDescriptor diff --git a/src/native/corehost/apphost/static/singlefilehost_freebsdexports.src b/src/native/corehost/apphost/static/singlefilehost_freebsdexports.src index 1f9c517821855..da5cab866d93f 100644 --- a/src/native/corehost/apphost/static/singlefilehost_freebsdexports.src +++ b/src/native/corehost/apphost/static/singlefilehost_freebsdexports.src @@ -10,6 +10,9 @@ g_dacTable ; Used by profilers MetaDataGetDispenser +; cDAC contract descriptor +DotNetRuntimeContractDescriptor + ; FreeBSD needs to reexport these __progname environ diff --git a/src/native/corehost/apphost/static/singlefilehost_unixexports.src b/src/native/corehost/apphost/static/singlefilehost_unixexports.src index 18d5697e84580..23c60f6b8b162 100644 --- a/src/native/corehost/apphost/static/singlefilehost_unixexports.src +++ b/src/native/corehost/apphost/static/singlefilehost_unixexports.src @@ -9,3 +9,6 @@ g_dacTable ; Used by profilers MetaDataGetDispenser + +; cDAC contract descriptor +DotNetRuntimeContractDescriptor