Skip to content

Commit

Permalink
fix panic in ParquetMetadata::memory_size: check has_min_max_set be…
Browse files Browse the repository at this point in the history
…fore invoking min()/max() (#6092)

* fix: check has_min_max_set before invoking min()/max()

* chore: add unit test for statistics heap size

* Fixup test

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
Fischer0522 and alamb authored Jul 20, 2024
1 parent 16915b5 commit ee56940
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
9 changes: 8 additions & 1 deletion parquet/src/file/metadata/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,14 @@ impl<T: ParquetValueType> HeapSize for PageIndex<T> {

impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
fn heap_size(&self) -> usize {
self.min().heap_size() + self.max().heap_size()
if self.has_min_max_set() {
return self.min().heap_size() + self.max().heap_size();
} else if self.min_is_exact() {
return self.min().heap_size();
} else if self.max_is_exact() {
return self.max().heap_size();
}
0
}
}
impl HeapSize for bool {
Expand Down
32 changes: 28 additions & 4 deletions parquet/src/file/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1291,7 +1291,11 @@ mod tests {
let columns = schema_descr
.columns()
.iter()
.map(|column_descr| ColumnChunkMetaData::builder(column_descr.clone()).build())
.map(|column_descr| {
ColumnChunkMetaData::builder(column_descr.clone())
.set_statistics(Statistics::new::<i32>(None, None, None, 0, false))
.build()
})
.collect::<Result<Vec<_>>>()
.unwrap();
let row_group_meta = RowGroupMetaData::builder(schema_descr.clone())
Expand All @@ -1317,11 +1321,31 @@ mod tests {
num_rows,
created_by,
key_value_metadata,
schema_descr,
schema_descr.clone(),
column_orders,
);
let parquet_meta = ParquetMetaData::new(file_metadata.clone(), row_group_meta.clone());
let base_expected_size = 1320;

// Now, add in Exact Statistics
let columns_with_stats = schema_descr
.columns()
.iter()
.map(|column_descr| {
ColumnChunkMetaData::builder(column_descr.clone())
.set_statistics(Statistics::new::<i32>(Some(0), Some(100), None, 0, false))
.build()
})
.collect::<Result<Vec<_>>>()
.unwrap();

let row_group_meta_with_stats = RowGroupMetaData::builder(schema_descr)
.set_num_rows(1000)
.set_column_metadata(columns_with_stats)
.build()
.unwrap();
let row_group_meta_with_stats = vec![row_group_meta_with_stats];

let parquet_meta = ParquetMetaData::new(file_metadata.clone(), row_group_meta_with_stats);
let base_expected_size = 2024;
assert_eq!(parquet_meta.memory_size(), base_expected_size);

let mut column_index = ColumnIndexBuilder::new();
Expand Down

0 comments on commit ee56940

Please sign in to comment.