diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index ab526a5ac7..7a28d7424d 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -581,6 +581,9 @@ namespace orc { lengthDecoder->next(lengthArray + 1, dictSize, nullptr); lengthArray[0] = 0; for(uint32_t i = 1; i < dictSize + 1; ++i) { + if (lengthArray[i] < 0) { + throw ParseError("Negative dictionary entry length"); + } lengthArray[i] += lengthArray[i - 1]; } dictionary->dictionaryBlob.resize( diff --git a/examples/corrupt/negative_dict_entry_lengths.orc b/examples/corrupt/negative_dict_entry_lengths.orc new file mode 100644 index 0000000000..171537db99 Binary files /dev/null and b/examples/corrupt/negative_dict_entry_lengths.orc differ diff --git a/tools/test/TestFileScan.cc b/tools/test/TestFileScan.cc index b4938ea8f7..54c044d7a2 100644 --- a/tools/test/TestFileScan.cc +++ b/tools/test/TestFileScan.cc @@ -136,13 +136,18 @@ TEST (TestFileScan, testBadCommand) { EXPECT_EQ("The --batch parameter requires an integer option.\n", error); } -TEST (TestFileScan, testErrorHandling) { +void checkForError(const std::string& filename, const std::string& error_msg) { const std::string pgm = findProgram("tools/src/orc-scan"); - const std::string file = findExample("corrupt/stripe_footer_bad_column_encodings.orc"); std::string output; std::string error; - EXPECT_EQ(1, runProgram({pgm, file}, output, error)); + EXPECT_EQ(1, runProgram({pgm, filename}, output, error)); EXPECT_EQ("", output); - EXPECT_NE(std::string::npos, error.find( - "bad number of ColumnEncodings in StripeFooter: expected=6, actual=0")); + EXPECT_NE(std::string::npos, error.find(error_msg)); +} + +TEST (TestFileScan, testErrorHandling) { + checkForError(findExample("corrupt/stripe_footer_bad_column_encodings.orc"), + "bad number of ColumnEncodings in StripeFooter: expected=6, actual=0"); + checkForError(findExample("corrupt/negative_dict_entry_lengths.orc"), + "Negative dictionary entry length"); }