Skip to content

Commit

Permalink
Added function for getting the type of a MiniTable field
Browse files Browse the repository at this point in the history
Prior to this CL, users were relying on `field->descriptortype` to get the field type.  This almost works, as `field->descriptortype` is almost, but not quite, the field type of the field.  In two special cases we deviate from the true field type, for ease of parsing and serialization:

- For open enums, we use `kUpb_FieldType_Int32` instead of `kUpb_FieldType_Enum`, because from the perspective of the wire format, an open enum field is equivalent to int32.
- For proto2 strings, we use `kUpb_FieldType_Bytes` instead of `kUpb_FieldType_String`, because proto2 strings do not perform UTF-8 validation, which makes them equivalent to bytes.

In this CL we add a public API function:

```
// Returns the true field type for this field.
upb_FieldType upb_MiniTableField_Type(const upb_MiniTable_Field* f);
```

This will provide the actual field type for this field.

Note that this CL changes the MiniDescriptor format.  Previously MiniDescriptors did not contain enough information to distinguish between Enum/Int32.  To remedy this we added a new encoded field type, `kUpb_EncodedType_ClosedEnum`.

PiperOrigin-RevId: 479387672
  • Loading branch information
haberman authored and copybara-github committed Oct 6, 2022
1 parent 30a28f3 commit b33fd88
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 13 deletions.
57 changes: 44 additions & 13 deletions upb/mini_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ typedef enum {
kUpb_EncodedType_String = 15,
kUpb_EncodedType_Group = 16,
kUpb_EncodedType_Message = 17,
kUpb_EncodedType_ClosedEnum = 18,

kUpb_EncodedType_RepeatedBase = 20,
} upb_EncodedType;

typedef enum {
kUpb_EncodedFieldModifier_FlipPacked = 1 << 0,
kUpb_EncodedFieldModifier_IsClosedEnum = 1 << 1,
// upb only.
kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2,
kUpb_EncodedFieldModifier_IsRequired = 1 << 3,
Expand Down Expand Up @@ -236,12 +236,11 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
uint32_t encoded_modifiers = 0;

// Put field type.
if (type == kUpb_FieldType_Enum &&
!(field_mod & kUpb_FieldModifier_IsClosedEnum)) {
type = kUpb_FieldType_Int32;
}

int encoded_type = kUpb_TypeToEncoded[type];
if (field_mod & kUpb_FieldModifier_IsClosedEnum) {
UPB_ASSERT(type == kUpb_FieldType_Enum);
encoded_type = kUpb_EncodedType_ClosedEnum;
}
if (field_mod & kUpb_FieldModifier_IsRepeated) {
// Repeated fields shift the type number up (unlike other modifiers which
// are bit flags).
Expand Down Expand Up @@ -352,6 +351,19 @@ const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
return NULL;
}

upb_FieldType upb_MiniTableField_Type(const upb_MiniTable_Field* field) {
if (field->mode & kUpb_LabelFlags_IsAlternate) {
if (field->descriptortype == kUpb_FieldType_Int32) {
return kUpb_FieldType_Enum;
} else if (field->descriptortype == kUpb_FieldType_Bytes) {
return kUpb_FieldType_String;
} else {
UPB_ASSERT(false);
}
}
return field->descriptortype;
}

/** Data decoder **************************************************************/

// Note: we sort by this number when calculating layout order.
Expand Down Expand Up @@ -457,6 +469,7 @@ static bool upb_MiniTable_HasSub(upb_MiniTable_Field* field,
case kUpb_FieldType_String:
if (!(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
field->descriptortype = kUpb_FieldType_Bytes;
field->mode |= kUpb_LabelFlags_IsAlternate;
}
return false;
default:
Expand All @@ -471,8 +484,16 @@ static bool upb_MtDecoder_FieldIsPackable(upb_MiniTable_Field* field) {

static void upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field* field,
upb_FieldType type, uint32_t* sub_count,
uint64_t msg_modifiers) {
uint64_t msg_modifiers,
bool is_proto3_enum) {
field->descriptortype = type;

if (is_proto3_enum) {
UPB_ASSERT(field->descriptortype == kUpb_FieldType_Enum);
field->descriptortype = kUpb_FieldType_Int32;
field->mode |= kUpb_LabelFlags_IsAlternate;
}

if (upb_MiniTable_HasSub(field, msg_modifiers)) {
field->submsg_index = sub_count ? (*sub_count)++ : 0;
} else {
Expand Down Expand Up @@ -506,6 +527,7 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
[kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte,
};

static const char kUpb_EncodedToType[] = {
Expand All @@ -527,6 +549,7 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
[kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
[kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
[kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
[kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
};

char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit
Expand Down Expand Up @@ -556,7 +579,7 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
UPB_UNREACHABLE();
}
upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_count,
msg_modifiers);
msg_modifiers, type == kUpb_EncodedType_Enum);
}

static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
Expand Down Expand Up @@ -1027,11 +1050,18 @@ upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
if (!ret || !fields) return NULL;

upb_MiniTable_Sub* subs = NULL;
if (value_is_proto3_enum) value_type = kUpb_FieldType_Int32;
if (value_type == kUpb_FieldType_Message ||
value_type == kUpb_FieldType_Group || value_type == kUpb_FieldType_Enum) {
uint64_t value_modifiers = 0;
if (value_is_proto3_enum) {
UPB_ASSERT(value_type == kUpb_FieldType_Enum);
// No sub needed.
} else if (value_type == kUpb_FieldType_Message ||
value_type == kUpb_FieldType_Group ||
value_type == kUpb_FieldType_Enum) {
subs = upb_Arena_Malloc(arena, sizeof(*subs));
if (!subs) return NULL;
if (value_type == kUpb_FieldType_Enum) {
value_modifiers |= kUpb_FieldModifier_IsClosedEnum;
}
}

size_t field_size =
Expand All @@ -1046,8 +1076,9 @@ upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
fields[0].offset = 0;
fields[1].offset = field_size;

upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0);
upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0);
upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0, false);
upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0,
value_is_proto3_enum);

ret->size = UPB_ALIGN_UP(2 * field_size, 8);
ret->field_count = 2;
Expand Down
2 changes: 2 additions & 0 deletions upb/mini_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ extern "C" {
const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
const upb_MiniTable* table, uint32_t number);

upb_FieldType upb_MiniTableField_Type(const upb_MiniTable_Field* field);

UPB_INLINE const upb_MiniTable* upb_MiniTable_GetSubMessageTable(
const upb_MiniTable* mini_table, const upb_MiniTable_Field* field) {
return mini_table->subs[field->submsg_index].submsg;
Expand Down
6 changes: 6 additions & 0 deletions upb/msg_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ typedef enum {
typedef enum {
kUpb_LabelFlags_IsPacked = 4,
kUpb_LabelFlags_IsExtension = 8,
// Indicates that this descriptor type is an "alternate type":
// - for Int32, this indicates that the actual type is Enum (but was
// rewritten to Int32 because it is an open enum that requires no check).
// - for Bytes, this indicates that the actual type is String (but does
// not require any UTF-8 check).
kUpb_LabelFlags_IsAlternate = 16,
} upb_LabelFlags;

// Note: we sort by this number when calculating layout order.
Expand Down
9 changes: 9 additions & 0 deletions upb/reflection/message_def.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,15 @@ void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) {
if (!m->layout) _upb_DefBuilder_OomErr(ctx);
}

#ifndef NDEBUG
for (int i = 0; i < m->field_count; i++) {
const upb_FieldDef* f = upb_MessageDef_Field(m, i);
const upb_MiniTable_Field* mt_f =
&m->layout->fields[_upb_FieldDef_LayoutIndex(f)];
UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f));
}
#endif

m->in_message_set = false;
for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) {
upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i);
Expand Down
4 changes: 4 additions & 0 deletions upbc/protoc-gen-upb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1246,6 +1246,10 @@ std::string GetModeInit(uint8_t mode32, uint8_t mode64) {
absl::StrAppend(&ret, " | kUpb_LabelFlags_IsExtension");
}

if (mode32 & kUpb_LabelFlags_IsAlternate) {
absl::StrAppend(&ret, " | kUpb_LabelFlags_IsAlternate");
}

std::string rep;
switch (mode32 >> kUpb_FieldRep_Shift) {
case kUpb_FieldRep_1Byte:
Expand Down

0 comments on commit b33fd88

Please sign in to comment.