From 319547923e2b3d7f12c7d9c83221f438cc15e8f9 Mon Sep 17 00:00:00 2001 From: stiga-huang Date: Thu, 2 Jan 2020 17:29:21 +0800 Subject: [PATCH] ORC-581:[C++] Verify fieldNames size for STRUCT types For STRUCT types, fieldNames size can be different than subTypes size in a corrupt file. We should verify it to avoid crash. --- c++/src/Reader.cc | 15 +++++++++++---- c++/test/TestType.cc | 29 +++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index 7957bda454..4d39969bd5 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -1008,10 +1008,11 @@ namespace orc { } /** - * Check that indices in the type tree are valid, so we won't crash - * when we convert the proto::Types to TypeImpls. + * Check that proto Types are valid. Indices in the type tree should be valid, + * so we won't crash when we convert the proto::Types to TypeImpls (ORC-317). + * For STRUCT types, fieldName size should match subTypes size (ORC-581). */ - void checkProtoTypeIds(const proto::Footer &footer) { + void checkProtoTypes(const proto::Footer &footer) { std::stringstream msg; int maxId = footer.types_size(); if (maxId <= 0) { @@ -1019,6 +1020,12 @@ namespace orc { } for (int i = 0; i < maxId; ++i) { const proto::Type& type = footer.types(i); + if (type.kind() == proto::Type_Kind_STRUCT + && type.subtypes_size() != type.fieldnames_size()) { + msg << "Footer is corrupt: STRUCT type " << i << " has " << type.subtypes_size() + << " subTypes, but has " << type.fieldnames_size() << " fieldNames"; + throw ParseError(msg.str()); + } for (int j = 0; j < type.subtypes_size(); ++j) { int subTypeId = static_cast(type.subtypes(j)); if (subTypeId <= i) { @@ -1070,7 +1077,7 @@ namespace orc { stream->getName()); } - checkProtoTypeIds(*footer); + checkProtoTypes(*footer); return REDUNDANT_MOVE(footer); } diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc index 5d61f6f3ce..e70a9ef029 100644 --- a/c++/test/TestType.cc +++ b/c++/test/TestType.cc @@ -343,7 +343,7 @@ namespace orc { void expectParseError(const proto::Footer &footer, const char* errMsg) { try { - checkProtoTypeIds(footer); + checkProtoTypes(footer); FAIL() << "Should throw ParseError for ill ids"; } catch (ParseError& e) { EXPECT_EQ(e.what(), std::string(errMsg)); @@ -352,22 +352,26 @@ namespace orc { } } - TEST(TestType, testCheckProtoTypeIds) { + TEST(TestType, testCheckProtoTypes) { proto::Footer footer; proto::Type rootType; expectParseError(footer, "Footer is corrupt: no types found"); rootType.set_kind(proto::Type_Kind_STRUCT); rootType.add_subtypes(1); // add a non existent type id + rootType.add_fieldnames("f1"); *(footer.add_types()) = rootType; expectParseError(footer, "Footer is corrupt: types(1) not exists"); footer.clear_types(); rootType.clear_subtypes(); + rootType.clear_fieldnames(); proto::Type structType; structType.set_kind(proto::Type_Kind_STRUCT); structType.add_subtypes(0); // construct a loop back to root + structType.add_fieldnames("root"); rootType.add_subtypes(1); + rootType.add_fieldnames("f1"); *(footer.add_types()) = rootType; *(footer.add_types()) = structType; expectParseError(footer, @@ -375,12 +379,14 @@ namespace orc { footer.clear_types(); rootType.clear_subtypes(); + rootType.clear_fieldnames(); proto::Type listType; listType.set_kind(proto::Type_Kind_LIST); proto::Type mapType; mapType.set_kind(proto::Type_Kind_MAP); proto::Type unionType; unionType.set_kind(proto::Type_Kind_UNION); + rootType.add_fieldnames("f1"); rootType.add_subtypes(1); // 0 -> 1 listType.add_subtypes(2); // 1 -> 2 mapType.add_subtypes(3); // 2 -> 3 @@ -394,16 +400,35 @@ namespace orc { footer.clear_types(); rootType.clear_subtypes(); + rootType.clear_fieldnames(); proto::Type intType; intType.set_kind(proto::Type_Kind_INT); proto::Type strType; strType.set_kind(proto::Type_Kind_STRING); rootType.add_subtypes(2); + rootType.add_fieldnames("f2"); rootType.add_subtypes(1); + rootType.add_fieldnames("f1"); *(footer.add_types()) = rootType; *(footer.add_types()) = intType; *(footer.add_types()) = strType; expectParseError(footer, "Footer is corrupt: subType(0) >= subType(1) in types(0). (2 >= 1)"); + + footer.clear_types(); + rootType.clear_subtypes(); + rootType.clear_fieldnames(); + rootType.set_kind(proto::Type_Kind_STRUCT); + rootType.add_subtypes(1); + *(footer.add_types()) = rootType; + *(footer.add_types()) = intType; + expectParseError(footer, + "Footer is corrupt: STRUCT type 0 has 1 subTypes, but has 0 fieldNames"); + // Should pass the check after adding the field name + footer.clear_types(); + rootType.add_fieldnames("f1"); + *(footer.add_types()) = rootType; + *(footer.add_types()) = intType; + checkProtoTypes(footer); } }