From 3c295eccf9d9793559f8e628c241bb94609afc3d Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Tue, 12 Jul 2022 09:29:19 -0700 Subject: [PATCH] clean up the mini descriptor code: - Correctly set the modifier field for MessageDefs - Add error handling - Respect the provided arena, stop hardwiring the global alloc - upb_MiniDescriptor_EncodeExtension() is now upb_MiniDescriptor_EncodeField() - Make the plugin code a lot easier to read PiperOrigin-RevId: 460482002 --- upb/mini_descriptor.c | 131 ++++++++++++++++------------------ upb/mini_descriptor.h | 17 +++-- upbc/code_generator_request.c | 125 ++++++++++++++++---------------- 3 files changed, 132 insertions(+), 141 deletions(-) diff --git a/upb/mini_descriptor.c b/upb/mini_descriptor.c index 21383704d2..f7a1d81b31 100644 --- a/upb/mini_descriptor.c +++ b/upb/mini_descriptor.c @@ -27,9 +27,6 @@ #include "upb/mini_descriptor.h" -#include -#include - #include "upb/mini_table.h" // Must be last. @@ -75,14 +72,10 @@ static bool upb_DescState_Grow(DescState* d, upb_Arena* a) { return true; } -static void upb_DescState_Emit(const DescState* d, upb_StringView* str) { - *str = upb_StringView_FromDataAndSize(d->buf, d->ptr - d->buf); -} - /******************************************************************************/ // Copied from upbc/protoc-gen-upb.cc TODO(salo): can we consolidate? -static uint64_t upb_Field_Modifier(const upb_FieldDef* f) { +static uint64_t upb_Field_Modifiers(const upb_FieldDef* f) { uint64_t out = 0; if (upb_FieldDef_IsRepeated(f)) { out |= kUpb_FieldModifier_IsRepeated; @@ -105,6 +98,19 @@ static uint64_t upb_Field_Modifier(const upb_FieldDef* f) { return out; } +static uint64_t upb_Message_Modifiers(const upb_MessageDef* m) { + uint64_t out = 0; + const upb_FileDef* file_def = upb_MessageDef_File(m); + if (upb_FileDef_Syntax(file_def) == kUpb_Syntax_Proto3) { + out |= kUpb_MessageModifier_ValidateUtf8; + out |= kUpb_MessageModifier_DefaultIsPacked; + } + if (upb_MessageDef_ExtensionRangeCount(m)) { + out |= kUpb_MessageModifier_IsExtendable; + } + return out; +} + /******************************************************************************/ // Sort by enum value. @@ -129,127 +135,112 @@ static int upb_MiniDescriptor_CompareFields(const void* a, const void* b) { return 0; } -upb_StringView upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* enum_def, - upb_Arena* a) { - upb_StringView out; - out.data = NULL; - out.size = 0; - +bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, char** data, + size_t* size, upb_Arena* a) { DescState s; upb_DescState_Init(&s); // Copy and sort. - const size_t len = upb_EnumDef_ValueCount(enum_def); - const upb_EnumValueDef** sorted = upb_gmalloc(len * sizeof(void*)); - if (!sorted) goto err; + const size_t len = upb_EnumDef_ValueCount(e); + const upb_EnumValueDef** sorted = + (const upb_EnumValueDef**)upb_Arena_Malloc(a, len * sizeof(void*)); + if (!sorted) return false; for (size_t i = 0; i < len; i++) { - sorted[i] = upb_EnumDef_Value(enum_def, i); + sorted[i] = upb_EnumDef_Value(e, i); } qsort(sorted, len, sizeof(void*), upb_MiniDescriptor_CompareEnums); upb_MtDataEncoder_StartEnum(&s.e); for (size_t i = 0; i < len; i++) { - if (!upb_DescState_Grow(&s, a)) goto err; + if (!upb_DescState_Grow(&s, a)) return false; const upb_EnumValueDef* value_def = sorted[i]; const int number = upb_EnumValueDef_Number(value_def); s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, number); - UPB_ASSERT(s.ptr); } - if (!upb_DescState_Grow(&s, a)) goto err; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr); - UPB_ASSERT(s.ptr); - - upb_DescState_Emit(&s, &out); -err: - if (sorted) upb_gfree(sorted); - return out; + upb_gfree(sorted); + *data = s.buf; + *size = s.ptr - s.buf; + return true; } -upb_StringView upb_MiniDescriptor_EncodeExtension(const upb_FieldDef* field_def, - upb_Arena* a) { - upb_StringView out; - out.data = NULL; - out.size = 0; - +bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, char** data, + size_t* size, upb_Arena* a) { DescState s; upb_DescState_Init(&s); - if (!upb_DescState_Grow(&s, a)) goto err; + if (!upb_DescState_Grow(&s, a)) return false; upb_MtDataEncoder_StartMessage(&s.e, s.ptr, 0); - UPB_ASSERT(upb_FieldDef_IsExtension(field_def)); - const upb_FieldType type = upb_FieldDef_Type(field_def); - const int number = upb_FieldDef_Number(field_def); - const uint64_t modifier = upb_Field_Modifier(field_def); - upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifier); + UPB_ASSERT(upb_FieldDef_IsExtension(f)); + const upb_FieldType type = upb_FieldDef_Type(f); + const int number = upb_FieldDef_Number(f); + const uint64_t modifiers = upb_Field_Modifiers(f); - upb_DescState_Emit(&s, &out); + if (!upb_DescState_Grow(&s, a)) return false; + upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); -err: - return out; + *data = s.buf; + *size = s.ptr - s.buf; + return true; } -upb_StringView upb_MiniDescriptor_EncodeMessage( - const upb_MessageDef* message_def, upb_Arena* a) { - upb_StringView out; - out.data = NULL; - out.size = 0; - +bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, char** data, + size_t* size, upb_Arena* a) { DescState s; upb_DescState_Init(&s); // Make a copy. - const size_t len = upb_MessageDef_FieldCount(message_def); - const upb_FieldDef** sorted = upb_gmalloc(len * sizeof(void*)); - if (!sorted) goto err; + const size_t len = upb_MessageDef_FieldCount(m); + const upb_FieldDef** sorted = + (const upb_FieldDef**)upb_Arena_Malloc(a, len * sizeof(void*)); + if (!sorted) return false; // Sort the copy. for (size_t i = 0; i < len; i++) { - sorted[i] = upb_MessageDef_Field(message_def, i); + sorted[i] = upb_MessageDef_Field(m, i); } qsort(sorted, len, sizeof(void*), upb_MiniDescriptor_CompareFields); - if (!upb_DescState_Grow(&s, a)) goto err; - upb_MtDataEncoder_StartMessage(&s.e, s.ptr, 0); + // Start encoding. + if (!upb_DescState_Grow(&s, a)) return false; + upb_MtDataEncoder_StartMessage(&s.e, s.ptr, upb_Message_Modifiers(m)); // Encode the fields. for (size_t i = 0; i < len; i++) { const upb_FieldDef* field_def = sorted[i]; - const int number = upb_FieldDef_Number(field_def); const upb_FieldType type = upb_FieldDef_Type(field_def); - const uint64_t modifier = upb_Field_Modifier(field_def); + const int number = upb_FieldDef_Number(field_def); + const uint64_t modifiers = upb_Field_Modifiers(field_def); - if (!upb_DescState_Grow(&s, a)) goto err; - s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifier); - UPB_ASSERT(s.ptr); + if (!upb_DescState_Grow(&s, a)) return false; + s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); } // Encode the oneofs. - const int oneof_count = upb_MessageDef_OneofCount(message_def); + const int oneof_count = upb_MessageDef_OneofCount(m); for (int i = 0; i < oneof_count; i++) { - if (!upb_DescState_Grow(&s, a)) goto err; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_StartOneof(&s.e, s.ptr); - UPB_ASSERT(s.ptr); - const upb_OneofDef* oneof_def = upb_MessageDef_Oneof(message_def, i); + const upb_OneofDef* oneof_def = upb_MessageDef_Oneof(m, i); const int field_count = upb_OneofDef_FieldCount(oneof_def); for (int j = 0; j < field_count; j++) { const upb_FieldDef* field_def = upb_OneofDef_Field(oneof_def, j); const int number = upb_FieldDef_Number(field_def); - if (!upb_DescState_Grow(&s, a)) goto err; + if (!upb_DescState_Grow(&s, a)) return false; s.ptr = upb_MtDataEncoder_PutOneofField(&s.e, s.ptr, number); - UPB_ASSERT(s.ptr); } } - upb_DescState_Emit(&s, &out); - -err: - if (sorted) upb_gfree(sorted); - return out; + upb_gfree(sorted); + *data = s.buf; + *size = s.ptr - s.buf; + return true; } diff --git a/upb/mini_descriptor.h b/upb/mini_descriptor.h index 829d008048..73fbae3b29 100644 --- a/upb/mini_descriptor.h +++ b/upb/mini_descriptor.h @@ -28,8 +28,8 @@ #ifndef UPB_MINI_DESCRIPTOR_H_ #define UPB_MINI_DESCRIPTOR_H_ +#include "upb/arena.h" #include "upb/def.h" -#include "upb/upb.h" // Must be last. #include "upb/port_def.inc" @@ -40,14 +40,17 @@ extern "C" { /** upb_MiniDescriptor ********************************************************/ -upb_StringView upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* enum_def, - upb_Arena* a); +// All of these functions return true on success, false on failure. +// Failure always means an OOM error. -upb_StringView upb_MiniDescriptor_EncodeExtension(const upb_FieldDef* field_def, - upb_Arena* a); +bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, char** data, + size_t* size, upb_Arena* a); -upb_StringView upb_MiniDescriptor_EncodeMessage( - const upb_MessageDef* message_def, upb_Arena* a); +bool upb_MiniDescriptor_EncodeField(const upb_FieldDef* f, char** data, + size_t* size, upb_Arena* a); + +bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, char** data, + size_t* size, upb_Arena* a); #ifdef __cplusplus } /* extern "C" */ diff --git a/upbc/code_generator_request.c b/upbc/code_generator_request.c index 2591690f15..1ac0c4e683 100644 --- a/upbc/code_generator_request.c +++ b/upbc/code_generator_request.c @@ -28,7 +28,6 @@ #include "upbc/code_generator_request.h" #include -#include #include #include "google/protobuf/compiler/plugin.upb.h" @@ -56,7 +55,8 @@ static void upbc_State_Fini(upbc_State* s) { if (s->symtab) upb_DefPool_Free(s->symtab); } -static void upbc_Error(upbc_State* s, const char* fn, const char* msg) { +UPB_NORETURN static void upbc_Error(upbc_State* s, const char* fn, + const char* msg) { upb_Status_SetErrorFormat(s->status, "%s(): %s", fn, msg); upbc_State_Fini(s); UPB_LONGJMP(s->jmp, -1); @@ -70,9 +70,10 @@ static void upbc_State_Init(upbc_State* s) { if (!s->out) upbc_Error(s, __func__, "could not allocate request"); } -static void upbc_State_Emit(upbc_State* s, const char* name, - upb_StringView encoding) { +static void upbc_State_Emit(upbc_State* s, const char* name, const char* data, + size_t size) { const upb_StringView key = upb_StringView_FromString(name); + const upb_StringView encoding = upb_StringView_FromDataAndSize(data, size); bool ok = upbc_CodeGeneratorRequest_mini_descriptors_set(s->out, key, encoding, s->arena); if (!ok) upbc_Error(s, __func__, "could not set mini descriptor in map"); @@ -83,52 +84,52 @@ static void upbc_State_Emit(upbc_State* s, const char* name, // Forward declaration. static void upbc_Scrape_Message(upbc_State*, const upb_MessageDef*); -static void upbc_Scrape_Enum(upbc_State* s, const upb_EnumDef* enum_def) { - const char* name = upb_EnumDef_FullName(enum_def); - const upb_StringView encoding = - upb_MiniDescriptor_EncodeEnum(enum_def, s->arena); - upbc_State_Emit(s, name, encoding); +static void upbc_Scrape_Enum(upbc_State* s, const upb_EnumDef* e) { + char* data; + size_t size; + bool ok = upb_MiniDescriptor_EncodeEnum(e, &data, &size, s->arena); + if (!ok) upbc_Error(s, __func__, "could not encode enum"); + + upbc_State_Emit(s, upb_EnumDef_FullName(e), data, size); } -static void upbc_Scrape_Extension(upbc_State* s, - const upb_FieldDef* field_def) { - const char* name = upb_FieldDef_FullName(field_def); - const upb_StringView encoding = - upb_MiniDescriptor_EncodeExtension(field_def, s->arena); - upbc_State_Emit(s, name, encoding); +static void upbc_Scrape_Extension(upbc_State* s, const upb_FieldDef* f) { + char* data; + size_t size; + bool ok = upb_MiniDescriptor_EncodeField(f, &data, &size, s->arena); + if (!ok) upbc_Error(s, __func__, "could not encode extension"); + + upbc_State_Emit(s, upb_FieldDef_FullName(f), data, size); } -static void upbc_Scrape_FileEnums(upbc_State* s, const upb_FileDef* file_def) { - const size_t len = upb_FileDef_TopLevelEnumCount(file_def); +static void upbc_Scrape_FileEnums(upbc_State* s, const upb_FileDef* f) { + const size_t len = upb_FileDef_TopLevelEnumCount(f); + for (size_t i = 0; i < len; i++) { - const upb_EnumDef* enum_def = upb_FileDef_TopLevelEnum(file_def, i); - upbc_Scrape_Enum(s, enum_def); + upbc_Scrape_Enum(s, upb_FileDef_TopLevelEnum(f, i)); } } -static void upbc_Scrape_FileExtensions(upbc_State* s, - const upb_FileDef* file_def) { - const size_t len = upb_FileDef_TopLevelExtensionCount(file_def); +static void upbc_Scrape_FileExtensions(upbc_State* s, const upb_FileDef* f) { + const size_t len = upb_FileDef_TopLevelExtensionCount(f); + for (size_t i = 0; i < len; i++) { - const upb_FieldDef* field_def = upb_FileDef_TopLevelExtension(file_def, i); - upbc_Scrape_Extension(s, field_def); + upbc_Scrape_Extension(s, upb_FileDef_TopLevelExtension(f, i)); } } -static void upbc_Scrape_FileMessages(upbc_State* s, - const upb_FileDef* file_def) { - const size_t len = upb_FileDef_TopLevelMessageCount(file_def); +static void upbc_Scrape_FileMessages(upbc_State* s, const upb_FileDef* f) { + const size_t len = upb_FileDef_TopLevelMessageCount(f); + for (size_t i = 0; i < len; i++) { - const upb_MessageDef* message_def = - upb_FileDef_TopLevelMessage(file_def, i); - upbc_Scrape_Message(s, message_def); + upbc_Scrape_Message(s, upb_FileDef_TopLevelMessage(f, i)); } } -static void upbc_Scrape_File(upbc_State* s, const upb_FileDef* file_def) { - upbc_Scrape_FileEnums(s, file_def); - upbc_Scrape_FileExtensions(s, file_def); - upbc_Scrape_FileMessages(s, file_def); +static void upbc_Scrape_File(upbc_State* s, const upb_FileDef* f) { + upbc_Scrape_FileEnums(s, f); + upbc_Scrape_FileExtensions(s, f); + upbc_Scrape_FileMessages(s, f); } static void upbc_Scrape_Files(upbc_State* s) { @@ -136,57 +137,53 @@ static void upbc_Scrape_Files(upbc_State* s) { upbc_CodeGeneratorRequest_request(s->out); size_t len = 0; - const google_protobuf_FileDescriptorProto* const* file_types = + const google_protobuf_FileDescriptorProto* const* files = google_protobuf_compiler_CodeGeneratorRequest_proto_file(request, &len); for (size_t i = 0; i < len; i++) { - const upb_FileDef* file_def = - upb_DefPool_AddFile(s->symtab, file_types[i], s->status); - if (!file_def) upbc_Error(s, __func__, "could not add file to def pool"); + const upb_FileDef* f = upb_DefPool_AddFile(s->symtab, files[i], s->status); + if (!f) upbc_Error(s, __func__, "could not add file to def pool"); - upbc_Scrape_File(s, file_def); + upbc_Scrape_File(s, f); } } -static void upbc_Scrape_NestedEnums(upbc_State* s, - const upb_MessageDef* message_def) { - const size_t len = upb_MessageDef_NestedEnumCount(message_def); +static void upbc_Scrape_NestedEnums(upbc_State* s, const upb_MessageDef* m) { + const size_t len = upb_MessageDef_NestedEnumCount(m); + for (size_t i = 0; i < len; i++) { - const upb_EnumDef* enum_def = upb_MessageDef_NestedEnum(message_def, i); - upbc_Scrape_Enum(s, enum_def); + upbc_Scrape_Enum(s, upb_MessageDef_NestedEnum(m, i)); } } static void upbc_Scrape_NestedExtensions(upbc_State* s, - const upb_MessageDef* message_def) { - const size_t len = upb_MessageDef_NestedExtensionCount(message_def); + const upb_MessageDef* m) { + const size_t len = upb_MessageDef_NestedExtensionCount(m); + for (size_t i = 0; i < len; i++) { - const upb_FieldDef* field_def = - upb_MessageDef_NestedExtension(message_def, i); - upbc_Scrape_Extension(s, field_def); + upbc_Scrape_Extension(s, upb_MessageDef_NestedExtension(m, i)); } } -static void upbc_Scrape_NestedMessages(upbc_State* s, - const upb_MessageDef* message_def) { - const size_t len = upb_MessageDef_NestedMessageCount(message_def); +static void upbc_Scrape_NestedMessages(upbc_State* s, const upb_MessageDef* m) { + const size_t len = upb_MessageDef_NestedMessageCount(m); + for (size_t i = 0; i < len; i++) { - const upb_MessageDef* nested_def = - upb_MessageDef_NestedMessage(message_def, i); - upbc_Scrape_Message(s, nested_def); + upbc_Scrape_Message(s, upb_MessageDef_NestedMessage(m, i)); } } -static void upbc_Scrape_Message(upbc_State* s, - const upb_MessageDef* message_def) { - const char* name = upb_MessageDef_FullName(message_def); - const upb_StringView encoding = - upb_MiniDescriptor_EncodeMessage(message_def, s->arena); - upbc_State_Emit(s, name, encoding); +static void upbc_Scrape_Message(upbc_State* s, const upb_MessageDef* m) { + char* data; + size_t size; + bool ok = upb_MiniDescriptor_EncodeMessage(m, &data, &size, s->arena); + if (!ok) upbc_Error(s, __func__, "could not encode message"); + + upbc_State_Emit(s, upb_MessageDef_FullName(m), data, size); - upbc_Scrape_NestedEnums(s, message_def); - upbc_Scrape_NestedExtensions(s, message_def); - upbc_Scrape_NestedMessages(s, message_def); + upbc_Scrape_NestedEnums(s, m); + upbc_Scrape_NestedExtensions(s, m); + upbc_Scrape_NestedMessages(s, m); } upbc_CodeGeneratorRequest* upbc_MakeCodeGeneratorRequest(