From 0969975dda352059eb5f830c5368db4144ebf8ed Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 26 Feb 2024 08:53:35 -0800 Subject: [PATCH] GH-40068: [C++] Possible data race when reading metadata of a parquet file (#40111) ### Rationale for this change The `ParquetFileFragment` will cache the parquet metadata when loading it. The `metadata()` method accesses this metadata (a shared_ptr) but does not grab the lock used to set that shared_ptr. It's possible then that we are reading a shared_ptr at the same time some other thread is setting the shared_ptr which is technically (I think) undefined behavior. ### What changes are included in this PR? Guard access to the metadata by grabbing the mutex first ### Are these changes tested? Existing tests should regress this change ### Are there any user-facing changes? No * Closes: #40068 Authored-by: Weston Pace Signed-off-by: Antoine Pitrou --- cpp/src/arrow/dataset/file_parquet.cc | 5 +++++ cpp/src/arrow/dataset/file_parquet.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index 140917a2e6341..c17ba89be7907 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -779,6 +779,11 @@ ParquetFileFragment::ParquetFileFragment(FileSource source, parquet_format_(checked_cast(*format_)), row_groups_(std::move(row_groups)) {} +std::shared_ptr ParquetFileFragment::metadata() { + auto lock = physical_schema_mutex_.Lock(); + return metadata_; +} + Status ParquetFileFragment::EnsureCompleteMetadata(parquet::arrow::FileReader* reader) { auto lock = physical_schema_mutex_.Lock(); if (metadata_ != nullptr) { diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h index 5141f36385e3f..63d8fd729223c 100644 --- a/cpp/src/arrow/dataset/file_parquet.h +++ b/cpp/src/arrow/dataset/file_parquet.h @@ -165,7 +165,7 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment { } /// \brief Return the FileMetaData associated with this fragment. - const std::shared_ptr& metadata() const { return metadata_; } + std::shared_ptr metadata(); /// \brief Ensure this fragment's FileMetaData is in memory. Status EnsureCompleteMetadata(parquet::arrow::FileReader* reader = NULLPTR);