diff --git a/cpp/src/parquet/file/reader.cc b/cpp/src/parquet/file/reader.cc index 90200086b6597..ff619417e9a63 100644 --- a/cpp/src/parquet/file/reader.cc +++ b/cpp/src/parquet/file/reader.cc @@ -133,11 +133,24 @@ std::shared_ptr ParquetFileReader::RowGroup(int i) { // the fixed initial size is just for an example #define COL_WIDTH "20" -void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) { +void ParquetFileReader::DebugPrint(std::ostream& stream, + std::list selected_columns, bool print_values) { stream << "File statistics:\n"; - stream << "Total rows: " << this->num_rows() << "\n"; + stream << "Total rows: " << num_rows() << "\n"; - for (int i = 0; i < num_columns(); ++i) { + if (selected_columns.size() == 0) { + for (int i = 0; i < num_columns(); i++) { + selected_columns.push_back(i); + } + } else { + for (auto i : selected_columns) { + if (i < 0 || i >= num_columns()) { + throw ParquetException("Selected column is out of range"); + } + } + } + + for (auto i : selected_columns) { const ColumnDescriptor* descr = schema_->Column(i); stream << "Column " << i << ": " << descr->name() @@ -152,9 +165,7 @@ void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) { auto group_reader = RowGroup(r); // Print column metadata - int num_columns = group_reader->num_columns(); - - for (int i = 0; i < num_columns; ++i) { + for (auto i : selected_columns) { RowGroupStatistics stats = group_reader->GetColumnStats(i); stream << "Column " << i << ": " @@ -174,9 +185,10 @@ void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) { static constexpr int bufsize = 25; char buffer[bufsize]; - // Create readers for all columns and print contents - vector > scanners(num_columns, NULL); - for (int i = 0; i < num_columns; ++i) { + // Create readers for selected columns and print contents + vector > scanners(selected_columns.size(), NULL); + int j = 0; + for (auto i : selected_columns) { std::shared_ptr col_reader = group_reader->Column(i); std::stringstream ss; @@ -188,17 +200,17 @@ void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) { // This is OK in this method as long as the RowGroupReader does not get // deleted - scanners[i] = Scanner::Make(col_reader); + scanners[j++] = Scanner::Make(col_reader); } stream << "\n"; bool hasRow; do { hasRow = false; - for (int i = 0; i < num_columns; ++i) { - if (scanners[i]->HasNext()) { + for (auto scanner : scanners) { + if (scanner->HasNext()) { hasRow = true; - scanners[i]->PrintNext(stream, 17); + scanner->PrintNext(stream, 17); } } stream << "\n"; diff --git a/cpp/src/parquet/file/reader.h b/cpp/src/parquet/file/reader.h index f4455ac2a179f..3a54cfb996513 100644 --- a/cpp/src/parquet/file/reader.h +++ b/cpp/src/parquet/file/reader.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "parquet/column/page.h" @@ -119,7 +120,8 @@ class ParquetFileReader { return schema_->Column(i); } - void DebugPrint(std::ostream& stream, bool print_values = true); + void DebugPrint(std::ostream& stream, std::list selected_columns, + bool print_values = true); private: // PIMPL idiom diff --git a/cpp/src/parquet/reader-test.cc b/cpp/src/parquet/reader-test.cc index 10bcff7a7cf75..034d4e2cf7f55 100644 --- a/cpp/src/parquet/reader-test.cc +++ b/cpp/src/parquet/reader-test.cc @@ -124,13 +124,38 @@ TEST_F(TestAllTypesPlain, TestSetScannerBatchSize) { TEST_F(TestAllTypesPlain, DebugPrintWorks) { std::stringstream ss; - // Automatically parses metadata - reader_->DebugPrint(ss); + std::list columns; + reader_->DebugPrint(ss, columns); std::string result = ss.str(); ASSERT_GT(result.size(), 0); } +TEST_F(TestAllTypesPlain, ColumnSelection) { + std::stringstream ss; + + std::list columns; + columns.push_back(5); + columns.push_back(0); + columns.push_back(10); + reader_->DebugPrint(ss, columns); + + std::string result = ss.str(); + ASSERT_GT(result.size(), 0); +} + +TEST_F(TestAllTypesPlain, ColumnSelectionOutOfRange) { + std::stringstream ss; + + std::list columns; + columns.push_back(100); + ASSERT_THROW(reader_->DebugPrint(ss, columns), ParquetException); + + columns.clear(); + columns.push_back(-1); + ASSERT_THROW(reader_->DebugPrint(ss, columns), ParquetException); +} + class TestLocalFileSource : public ::testing::Test { public: