Skip to content

Commit

Permalink
add version/tag chars to the start of all mini descriptors
Browse files Browse the repository at this point in the history
Verified during decoding. The specific values are just placeholders for now.

PiperOrigin-RevId: 481009599
  • Loading branch information
ericsalo authored and copybara-github committed Oct 13, 2022
1 parent df34b04 commit 36ce2fa
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 24 deletions.
83 changes: 65 additions & 18 deletions upb/mini_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ enum {
kUpb_EncodedValue_MaxEnumMask = 'A',
};

enum {
kUpb_EncodedVersion_EnumV1 = '!',
kUpb_EncodedVersion_ExtensionV1 = '#',
kUpb_EncodedVersion_MessageV1 = '$',
};

char upb_ToBase92(int8_t ch) {
static const char kUpb_ToBase92[] = {
' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
Expand Down Expand Up @@ -156,14 +162,19 @@ static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
return ret;
}

static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr,
char ch) {
upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
if (ptr == e->end) return NULL;
*ptr++ = upb_ToBase92(ch);
*ptr++ = ch;
return ptr;
}

static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
return upb_MtDataEncoder_PutRaw(e, ptr, upb_ToBase92(ch));
}

static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
uint32_t val, int min, int max) {
int shift = _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min) + 1);
Expand Down Expand Up @@ -195,6 +206,10 @@ char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
in->state.msg_state.msg_modifiers = 0;
in->state.msg_state.last_field_num = 0;
in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;

ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1);
if (!ptr) return NULL;

return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod);
}

Expand All @@ -204,6 +219,10 @@ char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
in->state.msg_state.msg_modifiers = msg_mod;
in->state.msg_state.last_field_num = 0;
in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;

ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1);
if (!ptr) return NULL;

return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
}

Expand Down Expand Up @@ -303,10 +322,12 @@ char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
return ptr;
}

void upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, NULL);
char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
in->state.enum_state.present_values_mask = 0;
in->state.enum_state.last_written_value = 0;

return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1);
}

static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
Expand Down Expand Up @@ -1021,6 +1042,16 @@ upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
goto done;
}

// If the string is non-empty then it must begin with a version tag.
if (len) {
if (*data != kUpb_EncodedVersion_MessageV1) {
upb_MtDecoder_ErrorFormat(&decoder, "Invalid message version: %c", *data);
UPB_UNREACHABLE();
}
data++;
len--;
}

upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table);

decoder.table->size = 0;
Expand Down Expand Up @@ -1141,7 +1172,7 @@ static void upb_MiniTable_BuildEnumValue(upb_MtDecoder* d, uint32_t val) {
upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
upb_Arena* arena,
upb_Status* status) {
upb_MtDecoder d = {
upb_MtDecoder decoder = {
.enum_table = upb_Arena_Malloc(arena, upb_MiniTable_EnumSize(2)),
.enum_value_count = 0,
.enum_data_count = 0,
Expand All @@ -1151,33 +1182,41 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
.arena = arena,
};

if (UPB_SETJMP(d.err)) {
return NULL;
if (UPB_SETJMP(decoder.err)) return NULL;

// If the string is non-empty then it must begin with a version tag.
if (len) {
if (*data != kUpb_EncodedVersion_EnumV1) {
upb_MtDecoder_ErrorFormat(&decoder, "Invalid enum version: %c", *data);
UPB_UNREACHABLE();
}
data++;
len--;
}

upb_MtDecoder_CheckOutOfMemory(&d, d.enum_table);
upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.enum_table);

// Guarantee at least 64 bits of mask without checking mask size.
d.enum_table->mask_limit = 64;
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
decoder.enum_table->mask_limit = 64;
decoder.enum_table = _upb_MiniTable_AddEnumDataMember(&decoder, 0);
decoder.enum_table = _upb_MiniTable_AddEnumDataMember(&decoder, 0);

d.enum_table->value_count = 0;
decoder.enum_table->value_count = 0;

const char* ptr = data;
uint32_t base = 0;

while (ptr < d.end) {
while (ptr < decoder.end) {
char ch = *ptr++;
if (ch <= kUpb_EncodedValue_MaxEnumMask) {
uint32_t mask = upb_FromBase92(ch);
for (int i = 0; i < 5; i++, base++, mask >>= 1) {
if (mask & 1) upb_MiniTable_BuildEnumValue(&d, base);
if (mask & 1) upb_MiniTable_BuildEnumValue(&decoder, base);
}
} else if (kUpb_EncodedValue_MinSkip <= ch &&
ch <= kUpb_EncodedValue_MaxSkip) {
uint32_t skip;
ptr = upb_MiniTable_DecodeBase92Varint(&d, ptr, ch,
ptr = upb_MiniTable_DecodeBase92Varint(&decoder, ptr, ch,
kUpb_EncodedValue_MinSkip,
kUpb_EncodedValue_MaxSkip, &skip);
base += skip;
Expand All @@ -1187,7 +1226,7 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
}
}

return d.enum_table;
return decoder.enum_table;
}

const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
Expand All @@ -1201,8 +1240,16 @@ const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
.table = NULL,
};

if (UPB_SETJMP(decoder.err)) {
return NULL;
if (UPB_SETJMP(decoder.err)) return NULL;

// If the string is non-empty then it must begin with a version tag.
if (len) {
if (*data != kUpb_EncodedVersion_ExtensionV1) {
upb_MtDecoder_ErrorFormat(&decoder, "Invalid ext version: %c", *data);
UPB_UNREACHABLE();
}
data++;
len--;
}

uint16_t count = 0;
Expand Down
4 changes: 2 additions & 2 deletions upb/mini_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr);
char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
uint32_t field_num);

// Encodes the set of values for a given enum. The values must be given in
// Encodes the set of values for a given enum. The values must be given in
// order (after casting to uint32_t), and repeats are not allowed.
void upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e);
char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr);
char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
uint32_t val);
char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr);
Expand Down
5 changes: 4 additions & 1 deletion upb/mini_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ class MtDataEncoder {
});
}

void StartEnum() { upb_MtDataEncoder_StartEnum(&encoder_); }
bool StartEnum() {
return appender_(
[=](char* buf) { return upb_MtDataEncoder_StartEnum(&encoder_, buf); });
}

bool PutEnumValue(uint32_t enum_value) {
return appender_([=](char* buf) {
Expand Down
4 changes: 2 additions & 2 deletions upb/mini_table_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ TEST(MiniTableEnumTest, Enum) {
upb::Arena arena;
upb::MtDataEncoder e;

e.StartEnum();
ASSERT_TRUE(e.StartEnum());
absl::flat_hash_set<int32_t> values;
for (int i = 0; i < 256; i++) {
values.insert(i * 2);
Expand Down Expand Up @@ -256,7 +256,7 @@ TEST(MiniTableEnumTest, PositiveAndNegative) {
upb::Arena arena;
upb::MtDataEncoder e;

e.StartEnum();
ASSERT_TRUE(e.StartEnum());
absl::flat_hash_set<int32_t> values;
for (int i = 0; i < 100; i++) {
values.insert(i);
Expand Down
3 changes: 2 additions & 1 deletion upb/reflection/enum_def.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a,
if (!sorted) return false;
}

upb_MtDataEncoder_StartEnum(&s.e);
if (!_upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr);

// Duplicate values are allowed but we only encode each value once.
uint32_t previous = 0;
Expand Down

0 comments on commit 36ce2fa

Please sign in to comment.