Skip to content

Commit

Permalink
upb: upb_EnumDefs are now built using mini descriptors
Browse files Browse the repository at this point in the history
Added upb_EnumDef_IsSorted() as an optimization for presorted enum protos

PiperOrigin-RevId: 460564949
  • Loading branch information
ericsalo authored and copybara-github committed Jul 12, 2022
1 parent 3c295ec commit 9b3e873
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 105 deletions.
21 changes: 2 additions & 19 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -163,25 +163,6 @@ cc_library(
],
)

cc_library(
name = "mini_descriptor",
srcs = [
"upb/mini_descriptor.c",
],
hdrs = [
"upb/mini_descriptor.h",
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
":mini_table",
":port",
":reflection",
":table_internal",
":upb",
],
)

cc_library(
name = "mini_table_internal",
hdrs = [
Expand Down Expand Up @@ -395,12 +376,14 @@ cc_library(
name = "reflection",
srcs = [
"upb/def.c",
"upb/mini_descriptor.c",
"upb/msg.h",
"upb/reflection.c",
],
hdrs = [
"upb/def.h",
"upb/def.hpp",
"upb/mini_descriptor.h",
"upb/reflection.h",
"upb/reflection.hpp",
],
Expand Down
96 changes: 28 additions & 68 deletions upb/def.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@

#include <ctype.h>
#include <errno.h>
#include <setjmp.h>
#include <stdlib.h>
#include <string.h>

#include "upb/mini_descriptor.h"
#include "upb/mini_table.h"
#include "upb/reflection.h"

Expand Down Expand Up @@ -142,9 +142,7 @@ struct upb_EnumDef {
const upb_EnumValueDef* values;
int value_count;
int32_t defaultval;
#if UINTPTR_MAX == 0xffffffff
uint32_t padding; // Increase size to a multiple of 8.
#endif
bool is_sorted; // Are all of the values defined in ascending order?
};

struct upb_EnumValueDef {
Expand Down Expand Up @@ -406,6 +404,8 @@ int32_t upb_EnumDef_Default(const upb_EnumDef* e) {

int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; }

bool upb_EnumDef_IsSorted(const upb_EnumDef* e) { return e->is_sorted; }

const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
const upb_EnumDef* def, const char* name, size_t len) {
upb_value v;
Expand Down Expand Up @@ -1002,8 +1002,7 @@ const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; }

/* upb_MethodDef **************************************************************/

const google_protobuf_MethodOptions* upb_MethodDef_Options(
const upb_MethodDef* m) {
const google_protobuf_MethodOptions* upb_MethodDef_Options(const upb_MethodDef* m) {
return m->opts;
}

Expand Down Expand Up @@ -1043,8 +1042,7 @@ bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) {

/* upb_ServiceDef *************************************************************/

const google_protobuf_ServiceOptions* upb_ServiceDef_Options(
const upb_ServiceDef* s) {
const google_protobuf_ServiceOptions* upb_ServiceDef_Options(const upb_ServiceDef* s) {
return s->opts;
}

Expand Down Expand Up @@ -2457,54 +2455,18 @@ static int compare_int32(const void* a_ptr, const void* b_ptr) {
return a < b ? -1 : (a == b ? 0 : 1);
}

upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
const upb_EnumDef* e) {
int n = 0;
uint64_t mask = 0;

for (int i = 0; i < e->value_count; i++) {
uint32_t val = (uint32_t)e->values[i].number;
if (val < 64) {
mask |= 1ULL << val;
} else {
n++;
}
}

int32_t* values = symtab_alloc(ctx, sizeof(*values) * n);

if (n) {
int32_t* p = values;

// Add values outside the bitmask range to the list, as described in the
// comments for upb_MiniTable_Enum.
for (int i = 0; i < e->value_count; i++) {
int32_t val = e->values[i].number;
if ((uint32_t)val >= 64) {
*p++ = val;
}
}
UPB_ASSERT(p == values + n);
}

// Enums can have duplicate values; we must sort+uniq them.
if (values) qsort(values, n, sizeof(*values), &compare_int32);

int dst = 0;
for (int i = 0; i < n; dst++) {
int32_t val = values[i];
while (i < n && values[i] == val) i++; // Skip duplicates.
values[dst] = val;
}
n = dst;

UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask));

upb_MiniTable_Enum* layout = symtab_alloc(ctx, sizeof(*layout));
layout->value_count = n;
layout->mask = mask;
layout->values = values;
static upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
const upb_EnumDef* e) {
char* data;
size_t size;
bool ok = upb_MiniDescriptor_EncodeEnum(e, &data, &size, ctx->tmp_arena);
CHK_OOM(ok);

upb_Status status;
upb_MiniTable_Enum* layout =
upb_MiniTable_BuildEnum(data, size, ctx->arena, &status);
if (!layout)
symtab_errf(ctx, "Error building enum MiniTable: %s", status.msg);
return layout;
}

Expand Down Expand Up @@ -2542,7 +2504,6 @@ static void create_enumdef(
const google_protobuf_EnumDescriptorProto* enum_proto,
const upb_MessageDef* containing_type, const upb_EnumDef* _e) {
upb_EnumDef* e = (upb_EnumDef*)_e;
;
const google_protobuf_EnumValueDescriptorProto* const* values;
upb_StringView name;
size_t i, n;
Expand All @@ -2561,6 +2522,7 @@ static void create_enumdef(
CHK_OOM(upb_inttable_init(&e->iton, ctx->arena));

e->defaultval = 0;
e->is_sorted = true;
e->value_count = n;
e->values = symtab_alloc(ctx, sizeof(*e->values) * n);

Expand All @@ -2571,8 +2533,12 @@ static void create_enumdef(

SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto);

uint32_t previous = 0;
for (i = 0; i < n; i++) {
create_enumvaldef(ctx, prefix, values[i], e, i);
const uint32_t current = e->values[i].number;
if (previous > current) e->is_sorted = false;
previous = current;
}

upb_inttable_compact(&e->iton, ctx->arena);
Expand Down Expand Up @@ -2767,8 +2733,7 @@ static void resolve_extension(
symtab_errf(ctx, "extension for field '%s' had no extendee", f->full_name);
}

upb_StringView name =
google_protobuf_FieldDescriptorProto_extendee(field_proto);
upb_StringView name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
const upb_MessageDef* m =
symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
f->msgdef = m;
Expand Down Expand Up @@ -2930,21 +2895,18 @@ static void build_filedef(
symtab_errf(ctx, "File has no name");
}

file->name =
strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
file->name = strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));

if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
upb_StringView package =
google_protobuf_FileDescriptorProto_package(file_proto);
upb_StringView package = google_protobuf_FileDescriptorProto_package(file_proto);
check_ident(ctx, package, true);
file->package = strviewdup(ctx, package);
} else {
file->package = NULL;
}

if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
upb_StringView syntax =
google_protobuf_FileDescriptorProto_syntax(file_proto);
upb_StringView syntax = google_protobuf_FileDescriptorProto_syntax(file_proto);

if (streql_view(syntax, "proto2")) {
file->syntax = kUpb_Syntax_Proto2;
Expand Down Expand Up @@ -2978,8 +2940,7 @@ static void build_filedef(
}
}

public_deps =
google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n);
public_deps = google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n);
file->public_dep_count = n;
file->public_deps = symtab_alloc(ctx, sizeof(*file->public_deps) * n);
int32_t* mutable_public_deps = (int32_t*)file->public_deps;
Expand All @@ -2990,8 +2951,7 @@ static void build_filedef(
mutable_public_deps[i] = public_deps[i];
}

weak_deps =
google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n);
weak_deps = google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n);
file->weak_dep_count = n;
file->weak_deps = symtab_alloc(ctx, sizeof(*file->weak_deps) * n);
int32_t* mutable_weak_deps = (int32_t*)file->weak_deps;
Expand Down
1 change: 1 addition & 0 deletions upb/def.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e);
const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e);
int32_t upb_EnumDef_Default(const upb_EnumDef* e);
int upb_EnumDef_ValueCount(const upb_EnumDef* e);
bool upb_EnumDef_IsSorted(const upb_EnumDef* e);
const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i);

const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
Expand Down
50 changes: 33 additions & 17 deletions upb/mini_descriptor.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,33 +137,50 @@ static int upb_MiniDescriptor_CompareFields(const void* a, const void* b) {

bool upb_MiniDescriptor_EncodeEnum(const upb_EnumDef* e, char** data,
size_t* size, upb_Arena* a) {
const size_t len = upb_EnumDef_ValueCount(e);

DescState s;
upb_DescState_Init(&s);

// Copy and sort.
const size_t len = upb_EnumDef_ValueCount(e);
const upb_EnumValueDef** sorted =
(const upb_EnumValueDef**)upb_Arena_Malloc(a, len * sizeof(void*));
if (!sorted) return false;

for (size_t i = 0; i < len; i++) {
sorted[i] = upb_EnumDef_Value(e, i);
}
qsort(sorted, len, sizeof(void*), upb_MiniDescriptor_CompareEnums);
// Duplicate values are allowed but we only encode each value once.
uint32_t previous = 0;

upb_MtDataEncoder_StartEnum(&s.e);

for (size_t i = 0; i < len; i++) {
if (!upb_DescState_Grow(&s, a)) return false;
const upb_EnumValueDef* value_def = sorted[i];
const int number = upb_EnumValueDef_Number(value_def);
s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, number);
if (upb_EnumDef_IsSorted(e)) {
// The enum is well behaved so no need to copy/sort the pointers here.
for (size_t i = 0; i < len; i++) {
const uint32_t current = upb_EnumValueDef_Number(upb_EnumDef_Value(e, i));
if (i != 0 && previous == current) continue;

if (!upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current);
previous = current;
}
} else {
// The enum fields are unsorted.
const upb_EnumValueDef** sorted =
(const upb_EnumValueDef**)upb_Arena_Malloc(a, len * sizeof(void*));
if (!sorted) return false;

for (size_t i = 0; i < len; i++) {
sorted[i] = upb_EnumDef_Value(e, i);
}
qsort(sorted, len, sizeof(void*), upb_MiniDescriptor_CompareEnums);

for (size_t i = 0; i < len; i++) {
const uint32_t current = upb_EnumValueDef_Number(sorted[i]);
if (i != 0 && previous == current) continue;

if (!upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_PutEnumValue(&s.e, s.ptr, current);
previous = current;
}
}

if (!upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_EndEnum(&s.e, s.ptr);

upb_gfree(sorted);
*data = s.buf;
*size = s.ptr - s.buf;
return true;
Expand Down Expand Up @@ -239,7 +256,6 @@ bool upb_MiniDescriptor_EncodeMessage(const upb_MessageDef* m, char** data,
}
}

upb_gfree(sorted);
*data = s.buf;
*size = s.ptr - s.buf;
return true;
Expand Down
1 change: 0 additions & 1 deletion upbc/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ cc_binary(
":plugin_upb_proto",
":plugin_upb_proto_reflection",
"//:json",
"//:mini_descriptor",
"//:mini_table",
"//:port",
"//:reflection",
Expand Down

0 comments on commit 9b3e873

Please sign in to comment.