Skip to content

Commit

Permalink
More decoder fixes, and slightly changed parse call semantics.
Browse files Browse the repository at this point in the history
Prior to this change, if an error was returned, it would be
guaranteed to always return a short byte count.  Now the two
concepts are a bit more orthogonal.  There are cases where
the entire input is consumed even though an error was
encountered.
  • Loading branch information
haberman committed Aug 12, 2015
1 parent fe42734 commit 8544010
Show file tree
Hide file tree
Showing 6 changed files with 379 additions and 337 deletions.
48 changes: 46 additions & 2 deletions tests/pb/test_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@ string submsg(uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
}

// Like delim()/submsg(), but intentionally encodes an incorrect length.
// These help test when a delimited boundary doesn't land in the right place.
string badlen_delim(int err, const string& buf) {
return cat(varint(buf.size() + err), buf);
}

string badlen_submsg(int err, uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), badlen_delim(err, buf) );
}


/* A set of handlers that covers all .proto types *****************************/

Expand Down Expand Up @@ -436,6 +446,21 @@ upb::reffed_ptr<const upb::MessageDef> NewMessageDef() {
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));

f = upb::FieldDef::New();
ASSERT(f->set_name("f_group", NULL));
ASSERT(f->set_number(UPB_DESCRIPTOR_TYPE_GROUP, NULL));
f->set_descriptor_type(UPB_DESCRIPTOR_TYPE_GROUP);
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));

f = upb::FieldDef::New();
ASSERT(f->set_name("r_group", NULL));
ASSERT(f->set_number(rep_fn(UPB_DESCRIPTOR_TYPE_GROUP), NULL));
f->set_label(UPB_LABEL_REPEATED);
f->set_descriptor_type(UPB_DESCRIPTOR_TYPE_GROUP);
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));

upb::reffed_ptr<upb::EnumDef> e = upb::EnumDef::New();
ASSERT(e->AddValue("FOO", 1, NULL));
ASSERT(e->Freeze(NULL));
Expand Down Expand Up @@ -492,6 +517,8 @@ upb::reffed_ptr<const upb::Handlers> NewHandlers(TestMode mode) {
// to this type, eg: message M { optional M m = 1; }
reg_subm(h.get(), UPB_DESCRIPTOR_TYPE_MESSAGE);
reg_subm(h.get(), rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE));
reg_subm(h.get(), UPB_DESCRIPTOR_TYPE_GROUP);
reg_subm(h.get(), rep_fn(UPB_DESCRIPTOR_TYPE_GROUP));

// For NOP_FIELD we register no handlers, so we can pad a proto freely without
// changing the output.
Expand Down Expand Up @@ -645,6 +672,12 @@ void assert_successful_parse(const string& proto,

void assert_does_not_parse_at_eof(const string& proto) {
run_decoder(proto, NULL);

// Also test that we fail to parse at end-of-submessage, not just
// end-of-message.
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop),
NULL);
}

void assert_does_not_parse(const string& proto) {
Expand Down Expand Up @@ -859,6 +892,18 @@ void test_invalid() {
tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP)),
tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_END_GROUP)));

// Unknown string extends past enclosing submessage.
assert_does_not_parse(
cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
submsg(12345, string(" "))),
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));

// Unknown fixed-length field extends past enclosing submessage.
assert_does_not_parse(
cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(0))),
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));

// Test exceeding the resource limit of stack depth.
string buf;
for (int i = 0; i <= MAX_NESTING; i++) {
Expand Down Expand Up @@ -942,8 +987,7 @@ void test_valid() {

// Unknown field inside a known submessage.
assert_successful_parse(
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
submsg(12345, string(" "))),
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
varint(5)),
LINE("<")
Expand Down
15 changes: 0 additions & 15 deletions tests/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,6 @@ class VerboseParserEnvironment {
}
}

if (status_.ok() != (parsed >= bytes)) {
if (status_.ok()) {
fprintf(stderr,
"Error: decode function returned short byte count but set no "
"error status\n");
} else {
fprintf(stderr,
"Error: decode function returned complete byte count but set "
"error status\n");
}
fprintf(stderr, "Status: %s, parsed=%u, bytes=%u\n",
status_.error_message(), (unsigned)parsed, (unsigned)bytes);
ASSERT(false);
}

if (!status_.ok())
return false;

Expand Down
13 changes: 11 additions & 2 deletions upb/pb/compile_decoder_x64.dasc
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ static void emit_static_asm(jitcompiler *jc) {
| mov rcx, DELIMEND
| sub rcx, PTR
| sub rcx, rdx
| jb ->err // Len is greater than enclosing message.
| jb >4 // Len is greater than enclosing message.
| mov FRAME->end_ofs, rcx
| cmp FRAME, DECODER->limit
| je >3 // Stack overflow
Expand All @@ -319,14 +319,23 @@ static void emit_static_asm(jitcompiler *jc) {
|2:
| ret
|3:
| // Error -- call seterr.
| // Stack overflow error.
| mov PTR, DECODER->checkpoint // Rollback to before the delim len.
| // Prepare seterr args.
| mov ARG1_64, DECODER
| ld64 kPbDecoderStackOverflow
| callp upb_pbdecoder_seterr
| call ->suspend
| jmp <1
|4:
| // Overextended len.
| mov PTR, DECODER->checkpoint // Rollback to before the delim len.
| // Prepare seterr args.
| mov ARG1_64, DECODER
| ld64 kPbDecoderSubmessageTooLong
| callp upb_pbdecoder_seterr
| call ->suspend
| jmp <1
|
| // For getting a value that spans a buffer seam. Falls back to C.
|.macro getvalue_slow, func, bytes
Expand Down
Loading

0 comments on commit 8544010

Please sign in to comment.