diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d44462236b2..5a8d9f54673 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -143,9 +143,11 @@ repos: hooks: - id: verify-copyright exclude: | - (?x) - cpp/include/cudf_test/cxxopts[.]hpp$ - + (?x)^( + cpp/include/cudf_test/cxxopts[.]hpp$| + cpp/src/io/parquet/ipc/Message_generated[.]h$| + cpp/src/io/parquet/ipc/Schema_generated[.]h$ + ) default_language_version: python: python3 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f11f3fc3c9a..474269364de 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -192,6 +192,8 @@ include(cmake/thirdparty/get_cccl.cmake) include(cmake/thirdparty/get_rmm.cmake) # find arrow include(cmake/thirdparty/get_arrow.cmake) +# find flatbuffers +include(cmake/thirdparty/get_flatbuffers.cmake) # find dlpack include(cmake/thirdparty/get_dlpack.cmake) # find cuCollections, should come after including CCCL @@ -429,6 +431,7 @@ add_library( src/io/text/bgzip_utils.cpp src/io/text/multibyte_split.cu src/io/utilities/arrow_io_source.cpp + src/io/utilities/base64_utilities.cpp src/io/utilities/column_buffer.cpp src/io/utilities/column_buffer_strings.cu src/io/utilities/config_utils.cpp @@ -742,6 +745,7 @@ target_include_directories( "$" PRIVATE "$" "$" + "$" INTERFACE "$" ) diff --git a/cpp/cmake/thirdparty/get_flatbuffers.cmake b/cpp/cmake/thirdparty/get_flatbuffers.cmake new file mode 100644 index 00000000000..b0ece38b8ef --- /dev/null +++ b/cpp/cmake/thirdparty/get_flatbuffers.cmake @@ -0,0 +1,33 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone flatbuffers +function(find_and_configure_flatbuffers VERSION) + + rapids_cpm_find( + flatbuffers ${VERSION} + GLOBAL_TARGETS flatbuffers + CPM_ARGS + GIT_REPOSITORY https://github.com/google/flatbuffers.git + GIT_TAG v${VERSION} + GIT_SHALLOW TRUE + ) + + rapids_export_find_package_root( + BUILD flatbuffers "${flatbuffers_BINARY_DIR}" EXPORT_SET cudf-exports + ) + +endfunction() + +find_and_configure_flatbuffers(24.3.25) diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 8bfcacdb47f..7f034668e43 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -71,6 +71,8 @@ class parquet_reader_options { bool _convert_strings_to_categories = false; // Whether to use PANDAS metadata to load columns bool _use_pandas_metadata = true; + // Whether to read and use ARROW schema + bool _use_arrow_schema = true; // Cast timestamp columns to a specific type data_type _timestamp_type{type_id::EMPTY}; @@ -126,6 +128,13 @@ class parquet_reader_options { */ [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; } + /** + * @brief Returns true/false depending whether to use arrow schema while reading. + * + * @return `true` if arrow schema is used while reading + */ + [[nodiscard]] bool is_enabled_use_arrow_schema() const { return _use_arrow_schema; } + /** * @brief Returns optional tree of metadata. * @@ -214,6 +223,13 @@ class parquet_reader_options { */ void enable_use_pandas_metadata(bool val) { _use_pandas_metadata = val; } + /** + * @brief Sets to enable/disable use of arrow schema to read. + * + * @param val Boolean value whether to use arrow schema + */ + void enable_use_arrow_schema(bool val) { _use_arrow_schema = val; } + /** * @brief Sets reader column schema. * @@ -328,6 +344,18 @@ class parquet_reader_options_builder { return *this; } + /** + * @brief Sets to enable/disable use of arrow schema to read. + * + * @param val Boolean value whether to use arrow schema + * @return this for chaining + */ + parquet_reader_options_builder& use_arrow_schema(bool val) + { + options._use_arrow_schema = val; + return *this; + } + /** * @brief Sets reader metadata. * diff --git a/cpp/src/io/parquet/ipc/Message_generated.h b/cpp/src/io/parquet/ipc/Message_generated.h new file mode 100644 index 00000000000..8ddd859f51c --- /dev/null +++ b/cpp/src/io/parquet/ipc/Message_generated.h @@ -0,0 +1,651 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_MESSAGE_CUDF_IO_PARQUET_FLATBUF_H_ +#define FLATBUFFERS_GENERATED_MESSAGE_CUDF_IO_PARQUET_FLATBUF_H_ + +#include + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 24 && FLATBUFFERS_VERSION_MINOR == 3 && + FLATBUFFERS_VERSION_REVISION == 25, + "Non-compatible flatbuffers version included"); + +#include "Schema_generated.h" + +namespace cudf { +namespace io { +namespace parquet { +namespace flatbuf { + +struct FieldNode; + +struct BodyCompression; +struct BodyCompressionBuilder; + +struct RecordBatch; +struct RecordBatchBuilder; + +struct DictionaryBatch; +struct DictionaryBatchBuilder; + +struct Message; +struct MessageBuilder; + +enum CompressionType : int8_t { + CompressionType_LZ4_FRAME = 0, + CompressionType_ZSTD = 1, + CompressionType_MIN = CompressionType_LZ4_FRAME, + CompressionType_MAX = CompressionType_ZSTD +}; + +inline const CompressionType (&EnumValuesCompressionType())[2] +{ + static const CompressionType values[] = {CompressionType_LZ4_FRAME, CompressionType_ZSTD}; + return values; +} + +inline const char* const* EnumNamesCompressionType() +{ + static const char* const names[3] = {"LZ4_FRAME", "ZSTD", nullptr}; + return names; +} + +inline const char* EnumNameCompressionType(CompressionType e) +{ + if (::flatbuffers::IsOutRange(e, CompressionType_LZ4_FRAME, CompressionType_ZSTD)) return ""; + const size_t index = static_cast(e); + return EnumNamesCompressionType()[index]; +} + +/// Provided for forward compatibility in case we need to support different +/// strategies for compressing the IPC message body (like whole-body +/// compression rather than buffer-level) in the future +enum BodyCompressionMethod : int8_t { + /// Each constituent buffer is first compressed with the indicated + /// compressor, and then written with the uncompressed length in the first 8 + /// bytes as a 64-bit little-endian signed integer followed by the compressed + /// buffer bytes (and then padding as required by the protocol). The + /// uncompressed length may be set to -1 to indicate that the data that + /// follows is not compressed, which can be useful for cases where + /// compression does not yield appreciable savings. + BodyCompressionMethod_BUFFER = 0, + BodyCompressionMethod_MIN = BodyCompressionMethod_BUFFER, + BodyCompressionMethod_MAX = BodyCompressionMethod_BUFFER +}; + +inline const BodyCompressionMethod (&EnumValuesBodyCompressionMethod())[1] +{ + static const BodyCompressionMethod values[] = {BodyCompressionMethod_BUFFER}; + return values; +} + +inline const char* const* EnumNamesBodyCompressionMethod() +{ + static const char* const names[2] = {"BUFFER", nullptr}; + return names; +} + +inline const char* EnumNameBodyCompressionMethod(BodyCompressionMethod e) +{ + if (::flatbuffers::IsOutRange(e, BodyCompressionMethod_BUFFER, BodyCompressionMethod_BUFFER)) + return ""; + const size_t index = static_cast(e); + return EnumNamesBodyCompressionMethod()[index]; +} + +/// ---------------------------------------------------------------------- +/// The root Message type +/// This union enables us to easily send different message types without +/// redundant storage, and in the future we can easily add new message types. +/// +/// Arrow implementations do not need to implement all of the message types, +/// which may include experimental metadata types. For maximum compatibility, +/// it is best to send data using RecordBatch +enum MessageHeader : uint8_t { + MessageHeader_NONE = 0, + MessageHeader_Schema = 1, + MessageHeader_MIN = MessageHeader_NONE, + MessageHeader_MAX = MessageHeader_Schema +}; + +inline const MessageHeader (&EnumValuesMessageHeader())[2] +{ + static const MessageHeader values[] = {MessageHeader_NONE, MessageHeader_Schema}; + return values; +} + +inline const char* const* EnumNamesMessageHeader() +{ + static const char* const names[3] = {"NONE", "Schema", nullptr}; + return names; +} + +inline const char* EnumNameMessageHeader(MessageHeader e) +{ + if (::flatbuffers::IsOutRange(e, MessageHeader_NONE, MessageHeader_Schema)) return ""; + const size_t index = static_cast(e); + return EnumNamesMessageHeader()[index]; +} + +template +struct MessageHeaderTraits { + static const MessageHeader enum_value = MessageHeader_NONE; +}; + +template <> +struct MessageHeaderTraits { + static const MessageHeader enum_value = MessageHeader_Schema; +}; + +bool VerifyMessageHeader(::flatbuffers::Verifier& verifier, const void* obj, MessageHeader type); +bool VerifyMessageHeaderVector(::flatbuffers::Verifier& verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset>* values, + const ::flatbuffers::Vector* types); + +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) +/// Metadata about a field at some level of a nested type tree (but not +/// its children). +/// +/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` +/// would have {length: 5, null_count: 2} for its List node, and {length: 6, +/// null_count: 0} for its Int16 node, as separate FieldNode structs +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) FieldNode FLATBUFFERS_FINAL_CLASS +{ + private: + int64_t length_; + int64_t null_count_; + + public: + FieldNode() : length_(0), null_count_(0) {} + FieldNode(int64_t _length, int64_t _null_count) + : length_(::flatbuffers::EndianScalar(_length)), + null_count_(::flatbuffers::EndianScalar(_null_count)) + { + } + /// The number of value slots in the Arrow array at this level of a nested + /// tree + int64_t length() const { return ::flatbuffers::EndianScalar(length_); } + /// The number of observed nulls. Fields with null_count == 0 may choose not + /// to write their physical validity bitmap out as a materialized buffer, + /// instead setting the length of the bitmap buffer to 0. + int64_t null_count() const { return ::flatbuffers::EndianScalar(null_count_); } +}; +FLATBUFFERS_STRUCT_END(FieldNode, 16); + +/// Optional compression for the memory buffers constituting IPC message +/// bodies. Intended for use with RecordBatch but could be used for other +/// message types +struct BodyCompression FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BodyCompressionBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_CODEC = 4, VT_METHOD = 6 }; + /// Compressor library. + /// For LZ4_FRAME, each compressed buffer must consist of a single frame. + cudf::io::parquet::flatbuf::CompressionType codec() const + { + return static_cast(GetField(VT_CODEC, 0)); + } + /// Indicates the way the record batch body was compressed + cudf::io::parquet::flatbuf::BodyCompressionMethod method() const + { + return static_cast( + GetField(VT_METHOD, 0)); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_CODEC, 1) && + VerifyField(verifier, VT_METHOD, 1) && verifier.EndTable(); + } +}; + +struct BodyCompressionBuilder { + typedef BodyCompression Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_codec(cudf::io::parquet::flatbuf::CompressionType codec) + { + fbb_.AddElement(BodyCompression::VT_CODEC, static_cast(codec), 0); + } + void add_method(cudf::io::parquet::flatbuf::BodyCompressionMethod method) + { + fbb_.AddElement(BodyCompression::VT_METHOD, static_cast(method), 0); + } + explicit BodyCompressionBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBodyCompression( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::CompressionType codec = + cudf::io::parquet::flatbuf::CompressionType_LZ4_FRAME, + cudf::io::parquet::flatbuf::BodyCompressionMethod method = + cudf::io::parquet::flatbuf::BodyCompressionMethod_BUFFER) +{ + BodyCompressionBuilder builder_(_fbb); + builder_.add_method(method); + builder_.add_codec(codec); + return builder_.Finish(); +} + +/// A data header describing the shared memory layout of a "record" or "row" +/// batch. Some systems call this a "row batch" internally and others a "record +/// batch". +struct RecordBatch FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RecordBatchBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_LENGTH = 4, + VT_NODES = 6, + VT_BUFFERS = 8, + VT_COMPRESSION = 10, + VT_VARIADICBUFFERCOUNTS = 12 + }; + /// number of records / rows. The arrays in the batch should all have this + /// length + int64_t length() const { return GetField(VT_LENGTH, 0); } + /// Nodes correspond to the pre-ordered flattened logical schema + const ::flatbuffers::Vector* nodes() const + { + return GetPointer*>( + VT_NODES); + } + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the schema. For + /// example, most primitive arrays will have 2 buffers, 1 for the validity + /// bitmap and 1 for the values. For struct arrays, there will only be a + /// single buffer for the validity (nulls) bitmap + const ::flatbuffers::Vector* buffers() const + { + return GetPointer*>( + VT_BUFFERS); + } + /// Optional compression of the message body + const cudf::io::parquet::flatbuf::BodyCompression* compression() const + { + return GetPointer(VT_COMPRESSION); + } + /// Some types such as Utf8View are represented using a variable number of buffers. + /// For each such Field in the pre-ordered flattened logical schema, there will be + /// an entry in variadicBufferCounts to indicate the number of number of variadic + /// buffers which belong to that Field in the current RecordBatch. + /// + /// For example, the schema + /// col1: Struct + /// col2: Utf8View + /// contains two Fields with variadic buffers so variadicBufferCounts will have + /// two entries, the first counting the variadic buffers of `col1.beta` and the + /// second counting `col2`'s. + /// + /// This field may be omitted if and only if the schema contains no Fields with + /// a variable number of buffers, such as BinaryView and Utf8View. + const ::flatbuffers::Vector* variadicBufferCounts() const + { + return GetPointer*>(VT_VARIADICBUFFERCOUNTS); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_LENGTH, 8) && + VerifyOffset(verifier, VT_NODES) && verifier.VerifyVector(nodes()) && + VerifyOffset(verifier, VT_BUFFERS) && verifier.VerifyVector(buffers()) && + VerifyOffset(verifier, VT_COMPRESSION) && verifier.VerifyTable(compression()) && + VerifyOffset(verifier, VT_VARIADICBUFFERCOUNTS) && + verifier.VerifyVector(variadicBufferCounts()) && verifier.EndTable(); + } +}; + +struct RecordBatchBuilder { + typedef RecordBatch Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_length(int64_t length) { fbb_.AddElement(RecordBatch::VT_LENGTH, length, 0); } + void add_nodes( + ::flatbuffers::Offset<::flatbuffers::Vector> + nodes) + { + fbb_.AddOffset(RecordBatch::VT_NODES, nodes); + } + void add_buffers( + ::flatbuffers::Offset<::flatbuffers::Vector> buffers) + { + fbb_.AddOffset(RecordBatch::VT_BUFFERS, buffers); + } + void add_compression( + ::flatbuffers::Offset compression) + { + fbb_.AddOffset(RecordBatch::VT_COMPRESSION, compression); + } + void add_variadicBufferCounts( + ::flatbuffers::Offset<::flatbuffers::Vector> variadicBufferCounts) + { + fbb_.AddOffset(RecordBatch::VT_VARIADICBUFFERCOUNTS, variadicBufferCounts); + } + explicit RecordBatchBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRecordBatch( + ::flatbuffers::FlatBufferBuilder& _fbb, + int64_t length = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> nodes = + 0, + ::flatbuffers::Offset<::flatbuffers::Vector> buffers = + 0, + ::flatbuffers::Offset compression = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> variadicBufferCounts = 0) +{ + RecordBatchBuilder builder_(_fbb); + builder_.add_length(length); + builder_.add_variadicBufferCounts(variadicBufferCounts); + builder_.add_compression(compression); + builder_.add_buffers(buffers); + builder_.add_nodes(nodes); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateRecordBatchDirect( + ::flatbuffers::FlatBufferBuilder& _fbb, + int64_t length = 0, + const std::vector* nodes = nullptr, + const std::vector* buffers = nullptr, + ::flatbuffers::Offset compression = 0, + const std::vector* variadicBufferCounts = nullptr) +{ + auto nodes__ = + nodes ? _fbb.CreateVectorOfStructs(*nodes) : 0; + auto buffers__ = + buffers ? _fbb.CreateVectorOfStructs(*buffers) : 0; + auto variadicBufferCounts__ = + variadicBufferCounts ? _fbb.CreateVector(*variadicBufferCounts) : 0; + return cudf::io::parquet::flatbuf::CreateRecordBatch( + _fbb, length, nodes__, buffers__, compression, variadicBufferCounts__); +} + +/// For sending dictionary encoding information. Any Field can be +/// dictionary-encoded, but in this case none of its children may be +/// dictionary-encoded. +/// There is one vector / column per dictionary, but that vector / column +/// may be spread across multiple dictionary batches by using the isDelta +/// flag +struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DictionaryBatchBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ID = 4, + VT_DATA = 6, + VT_ISDELTA = 8 + }; + int64_t id() const { return GetField(VT_ID, 0); } + const cudf::io::parquet::flatbuf::RecordBatch* data() const + { + return GetPointer(VT_DATA); + } + /// If isDelta is true the values in the dictionary are to be appended to a + /// dictionary with the indicated id. If isDelta is false this dictionary + /// should replace the existing dictionary. + bool isDelta() const { return GetField(VT_ISDELTA, 0) != 0; } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_ID, 8) && + VerifyOffset(verifier, VT_DATA) && verifier.VerifyTable(data()) && + VerifyField(verifier, VT_ISDELTA, 1) && verifier.EndTable(); + } +}; + +struct DictionaryBatchBuilder { + typedef DictionaryBatch Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_id(int64_t id) { fbb_.AddElement(DictionaryBatch::VT_ID, id, 0); } + void add_data(::flatbuffers::Offset data) + { + fbb_.AddOffset(DictionaryBatch::VT_DATA, data); + } + void add_isDelta(bool isDelta) + { + fbb_.AddElement(DictionaryBatch::VT_ISDELTA, static_cast(isDelta), 0); + } + explicit DictionaryBatchBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDictionaryBatch( + ::flatbuffers::FlatBufferBuilder& _fbb, + int64_t id = 0, + ::flatbuffers::Offset data = 0, + bool isDelta = false) +{ + DictionaryBatchBuilder builder_(_fbb); + builder_.add_id(id); + builder_.add_data(data); + builder_.add_isDelta(isDelta); + return builder_.Finish(); +} + +struct Message FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MessageBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VERSION = 4, + VT_HEADER_TYPE = 6, + VT_HEADER = 8, + VT_BODYLENGTH = 10, + VT_CUSTOM_METADATA = 12 + }; + cudf::io::parquet::flatbuf::MetadataVersion version() const + { + return static_cast( + GetField(VT_VERSION, 0)); + } + cudf::io::parquet::flatbuf::MessageHeader header_type() const + { + return static_cast( + GetField(VT_HEADER_TYPE, 0)); + } + const void* header() const { return GetPointer(VT_HEADER); } + template + const T* header_as() const; + const cudf::io::parquet::flatbuf::Schema* header_as_Schema() const + { + return header_type() == cudf::io::parquet::flatbuf::MessageHeader_Schema + ? static_cast(header()) + : nullptr; + } + int64_t bodyLength() const { return GetField(VT_BODYLENGTH, 0); } + const ::flatbuffers::Vector<::flatbuffers::Offset>* + custom_metadata() const + { + return GetPointer< + const ::flatbuffers::Vector<::flatbuffers::Offset>*>( + VT_CUSTOM_METADATA); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_VERSION, 2) && + VerifyField(verifier, VT_HEADER_TYPE, 1) && VerifyOffset(verifier, VT_HEADER) && + VerifyMessageHeader(verifier, header(), header_type()) && + VerifyField(verifier, VT_BODYLENGTH, 8) && + VerifyOffset(verifier, VT_CUSTOM_METADATA) && verifier.VerifyVector(custom_metadata()) && + verifier.VerifyVectorOfTables(custom_metadata()) && verifier.EndTable(); + } +}; + +template <> +inline const cudf::io::parquet::flatbuf::Schema* +Message::header_as() const +{ + return header_as_Schema(); +} + +struct MessageBuilder { + typedef Message Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_version(cudf::io::parquet::flatbuf::MetadataVersion version) + { + fbb_.AddElement(Message::VT_VERSION, static_cast(version), 0); + } + void add_header_type(cudf::io::parquet::flatbuf::MessageHeader header_type) + { + fbb_.AddElement(Message::VT_HEADER_TYPE, static_cast(header_type), 0); + } + void add_header(::flatbuffers::Offset header) + { + fbb_.AddOffset(Message::VT_HEADER, header); + } + void add_bodyLength(int64_t bodyLength) + { + fbb_.AddElement(Message::VT_BODYLENGTH, bodyLength, 0); + } + void add_custom_metadata( + ::flatbuffers::Offset< + ::flatbuffers::Vector<::flatbuffers::Offset>> + custom_metadata) + { + fbb_.AddOffset(Message::VT_CUSTOM_METADATA, custom_metadata); + } + explicit MessageBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMessage( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::MetadataVersion version = + cudf::io::parquet::flatbuf::MetadataVersion_V1, + cudf::io::parquet::flatbuf::MessageHeader header_type = + cudf::io::parquet::flatbuf::MessageHeader_NONE, + ::flatbuffers::Offset header = 0, + int64_t bodyLength = 0, + ::flatbuffers::Offset<::flatbuffers::Vector< + ::flatbuffers::Offset>> custom_metadata = 0) +{ + MessageBuilder builder_(_fbb); + builder_.add_bodyLength(bodyLength); + builder_.add_custom_metadata(custom_metadata); + builder_.add_header(header); + builder_.add_version(version); + builder_.add_header_type(header_type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateMessageDirect( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::MetadataVersion version = + cudf::io::parquet::flatbuf::MetadataVersion_V1, + cudf::io::parquet::flatbuf::MessageHeader header_type = + cudf::io::parquet::flatbuf::MessageHeader_NONE, + ::flatbuffers::Offset header = 0, + int64_t bodyLength = 0, + const std::vector<::flatbuffers::Offset>* custom_metadata = + nullptr) +{ + auto custom_metadata__ = + custom_metadata + ? _fbb.CreateVector<::flatbuffers::Offset>( + *custom_metadata) + : 0; + return cudf::io::parquet::flatbuf::CreateMessage( + _fbb, version, header_type, header, bodyLength, custom_metadata__); +} + +inline bool VerifyMessageHeader(::flatbuffers::Verifier& verifier, + const void* obj, + MessageHeader type) +{ + switch (type) { + case MessageHeader_NONE: { + return true; + } + case MessageHeader_Schema: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifyMessageHeaderVector( + ::flatbuffers::Verifier& verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset>* values, + const ::flatbuffers::Vector* types) +{ + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyMessageHeader(verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline const cudf::io::parquet::flatbuf::Message* GetMessage(const void* buf) +{ + return ::flatbuffers::GetRoot(buf); +} + +inline const cudf::io::parquet::flatbuf::Message* GetSizePrefixedMessage(const void* buf) +{ + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyMessageBuffer(::flatbuffers::Verifier& verifier) +{ + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedMessageBuffer(::flatbuffers::Verifier& verifier) +{ + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishMessageBuffer(::flatbuffers::FlatBufferBuilder& fbb, + ::flatbuffers::Offset root) +{ + fbb.Finish(root); +} + +inline void FinishSizePrefixedMessageBuffer( + ::flatbuffers::FlatBufferBuilder& fbb, + ::flatbuffers::Offset root) +{ + fbb.FinishSizePrefixed(root); +} + +} // namespace flatbuf +} // namespace parquet +} // namespace io +} // namespace cudf + +#endif // FLATBUFFERS_GENERATED_MESSAGE_CUDF_IO_PARQUET_FLATBUF_H_ diff --git a/cpp/src/io/parquet/ipc/Schema_generated.h b/cpp/src/io/parquet/ipc/Schema_generated.h new file mode 100644 index 00000000000..27141b4af31 --- /dev/null +++ b/cpp/src/io/parquet/ipc/Schema_generated.h @@ -0,0 +1,2769 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_SCHEMA_CUDF_IO_PARQUET_FLATBUF_H_ +#define FLATBUFFERS_GENERATED_SCHEMA_CUDF_IO_PARQUET_FLATBUF_H_ + +#include + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 24 && FLATBUFFERS_VERSION_MINOR == 3 && + FLATBUFFERS_VERSION_REVISION == 25, + "Non-compatible flatbuffers version included"); + +namespace cudf { +namespace io { +namespace parquet { +namespace flatbuf { + +struct Null; +struct NullBuilder; + +struct Struct_; +struct Struct_Builder; + +struct List; +struct ListBuilder; + +struct LargeList; +struct LargeListBuilder; + +struct ListView; +struct ListViewBuilder; + +struct LargeListView; +struct LargeListViewBuilder; + +struct FixedSizeList; +struct FixedSizeListBuilder; + +struct Map; +struct MapBuilder; + +struct Union; +struct UnionBuilder; + +struct Int; +struct IntBuilder; + +struct FloatingPoint; +struct FloatingPointBuilder; + +struct Utf8; +struct Utf8Builder; + +struct Binary; +struct BinaryBuilder; + +struct LargeUtf8; +struct LargeUtf8Builder; + +struct LargeBinary; +struct LargeBinaryBuilder; + +struct Utf8View; +struct Utf8ViewBuilder; + +struct BinaryView; +struct BinaryViewBuilder; + +struct FixedSizeBinary; +struct FixedSizeBinaryBuilder; + +struct Bool; +struct BoolBuilder; + +struct RunEndEncoded; +struct RunEndEncodedBuilder; + +struct Decimal; +struct DecimalBuilder; + +struct Date; +struct DateBuilder; + +struct Time; +struct TimeBuilder; + +struct Timestamp; +struct TimestampBuilder; + +struct Interval; +struct IntervalBuilder; + +struct Duration; +struct DurationBuilder; + +struct KeyValue; +struct KeyValueBuilder; + +struct DictionaryEncoding; +struct DictionaryEncodingBuilder; + +struct Field; +struct FieldBuilder; + +struct Buffer; + +struct Schema; +struct SchemaBuilder; + +enum MetadataVersion : int16_t { + /// 0.1.0 (October 2016). + MetadataVersion_V1 = 0, + /// 0.2.0 (February 2017). Non-backwards compatible with V1. + MetadataVersion_V2 = 1, + /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. + MetadataVersion_V3 = 2, + /// >= 0.8.0 (December 2017). Non-backwards compatible with V3. + MetadataVersion_V4 = 3, + /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4 + /// metadata and IPC messages). Implementations are recommended to provide a + /// V4 compatibility mode with V5 format changes disabled. + /// + /// Incompatible changes between V4 and V5: + /// - Union buffer layout has changed. In V5, Unions don't have a validity + /// bitmap buffer. + MetadataVersion_V5 = 4, + MetadataVersion_MIN = MetadataVersion_V1, + MetadataVersion_MAX = MetadataVersion_V5 +}; + +inline const MetadataVersion (&EnumValuesMetadataVersion())[5] +{ + static const MetadataVersion values[] = {MetadataVersion_V1, + MetadataVersion_V2, + MetadataVersion_V3, + MetadataVersion_V4, + MetadataVersion_V5}; + return values; +} + +inline const char* const* EnumNamesMetadataVersion() +{ + static const char* const names[6] = {"V1", "V2", "V3", "V4", "V5", nullptr}; + return names; +} + +inline const char* EnumNameMetadataVersion(MetadataVersion e) +{ + if (::flatbuffers::IsOutRange(e, MetadataVersion_V1, MetadataVersion_V5)) return ""; + const size_t index = static_cast(e); + return EnumNamesMetadataVersion()[index]; +} + +/// Represents Arrow Features that might not have full support +/// within implementations. This is intended to be used in +/// two scenarios: +/// 1. A mechanism for readers of Arrow Streams +/// and files to understand that the stream or file makes +/// use of a feature that isn't supported or unknown to +/// the implementation (and therefore can meet the Arrow +/// forward compatibility guarantees). +/// 2. A means of negotiating between a client and server +/// what features a stream is allowed to use. The enums +/// values here are intended to represent higher level +/// features, additional details maybe negotiated +/// with key-value pairs specific to the protocol. +/// +/// Enums added to this list should be assigned power-of-two values +/// to facilitate exchanging and comparing bitmaps for supported +/// features. +enum Feature : int64_t { + /// Needed to make flatbuffers happy. + Feature_UNUSED = 0, + /// The stream makes use of multiple full dictionaries with the + /// same ID and assumes clients implement dictionary replacement + /// correctly. + Feature_DICTIONARY_REPLACEMENT = 1LL, + /// The stream makes use of compressed bodies as described + /// in Message.fbs. + Feature_COMPRESSED_BODY = 2LL, + Feature_MIN = Feature_UNUSED, + Feature_MAX = Feature_COMPRESSED_BODY +}; + +inline const Feature (&EnumValuesFeature())[3] +{ + static const Feature values[] = { + Feature_UNUSED, Feature_DICTIONARY_REPLACEMENT, Feature_COMPRESSED_BODY}; + return values; +} + +inline const char* const* EnumNamesFeature() +{ + static const char* const names[4] = { + "UNUSED", "DICTIONARY_REPLACEMENT", "COMPRESSED_BODY", nullptr}; + return names; +} + +inline const char* EnumNameFeature(Feature e) +{ + if (::flatbuffers::IsOutRange(e, Feature_UNUSED, Feature_COMPRESSED_BODY)) return ""; + const size_t index = static_cast(e); + return EnumNamesFeature()[index]; +} + +enum UnionMode : int16_t { + UnionMode_Sparse = 0, + UnionMode_Dense = 1, + UnionMode_MIN = UnionMode_Sparse, + UnionMode_MAX = UnionMode_Dense +}; + +inline const UnionMode (&EnumValuesUnionMode())[2] +{ + static const UnionMode values[] = {UnionMode_Sparse, UnionMode_Dense}; + return values; +} + +inline const char* const* EnumNamesUnionMode() +{ + static const char* const names[3] = {"Sparse", "Dense", nullptr}; + return names; +} + +inline const char* EnumNameUnionMode(UnionMode e) +{ + if (::flatbuffers::IsOutRange(e, UnionMode_Sparse, UnionMode_Dense)) return ""; + const size_t index = static_cast(e); + return EnumNamesUnionMode()[index]; +} + +enum Precision : int16_t { + Precision_HALF = 0, + Precision_SINGLE = 1, + Precision_DOUBLE = 2, + Precision_MIN = Precision_HALF, + Precision_MAX = Precision_DOUBLE +}; + +inline const Precision (&EnumValuesPrecision())[3] +{ + static const Precision values[] = {Precision_HALF, Precision_SINGLE, Precision_DOUBLE}; + return values; +} + +inline const char* const* EnumNamesPrecision() +{ + static const char* const names[4] = {"HALF", "SINGLE", "DOUBLE", nullptr}; + return names; +} + +inline const char* EnumNamePrecision(Precision e) +{ + if (::flatbuffers::IsOutRange(e, Precision_HALF, Precision_DOUBLE)) return ""; + const size_t index = static_cast(e); + return EnumNamesPrecision()[index]; +} + +enum DateUnit : int16_t { + DateUnit_DAY = 0, + DateUnit_MILLISECOND = 1, + DateUnit_MIN = DateUnit_DAY, + DateUnit_MAX = DateUnit_MILLISECOND +}; + +inline const DateUnit (&EnumValuesDateUnit())[2] +{ + static const DateUnit values[] = {DateUnit_DAY, DateUnit_MILLISECOND}; + return values; +} + +inline const char* const* EnumNamesDateUnit() +{ + static const char* const names[3] = {"DAY", "MILLISECOND", nullptr}; + return names; +} + +inline const char* EnumNameDateUnit(DateUnit e) +{ + if (::flatbuffers::IsOutRange(e, DateUnit_DAY, DateUnit_MILLISECOND)) return ""; + const size_t index = static_cast(e); + return EnumNamesDateUnit()[index]; +} + +enum TimeUnit : int16_t { + TimeUnit_SECOND = 0, + TimeUnit_MILLISECOND = 1, + TimeUnit_MICROSECOND = 2, + TimeUnit_NANOSECOND = 3, + TimeUnit_MIN = TimeUnit_SECOND, + TimeUnit_MAX = TimeUnit_NANOSECOND +}; + +inline const TimeUnit (&EnumValuesTimeUnit())[4] +{ + static const TimeUnit values[] = { + TimeUnit_SECOND, TimeUnit_MILLISECOND, TimeUnit_MICROSECOND, TimeUnit_NANOSECOND}; + return values; +} + +inline const char* const* EnumNamesTimeUnit() +{ + static const char* const names[5] = { + "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND", nullptr}; + return names; +} + +inline const char* EnumNameTimeUnit(TimeUnit e) +{ + if (::flatbuffers::IsOutRange(e, TimeUnit_SECOND, TimeUnit_NANOSECOND)) return ""; + const size_t index = static_cast(e); + return EnumNamesTimeUnit()[index]; +} + +enum IntervalUnit : int16_t { + IntervalUnit_YEAR_MONTH = 0, + IntervalUnit_DAY_TIME = 1, + IntervalUnit_MONTH_DAY_NANO = 2, + IntervalUnit_MIN = IntervalUnit_YEAR_MONTH, + IntervalUnit_MAX = IntervalUnit_MONTH_DAY_NANO +}; + +inline const IntervalUnit (&EnumValuesIntervalUnit())[3] +{ + static const IntervalUnit values[] = { + IntervalUnit_YEAR_MONTH, IntervalUnit_DAY_TIME, IntervalUnit_MONTH_DAY_NANO}; + return values; +} + +inline const char* const* EnumNamesIntervalUnit() +{ + static const char* const names[4] = {"YEAR_MONTH", "DAY_TIME", "MONTH_DAY_NANO", nullptr}; + return names; +} + +inline const char* EnumNameIntervalUnit(IntervalUnit e) +{ + if (::flatbuffers::IsOutRange(e, IntervalUnit_YEAR_MONTH, IntervalUnit_MONTH_DAY_NANO)) return ""; + const size_t index = static_cast(e); + return EnumNamesIntervalUnit()[index]; +} + +/// ---------------------------------------------------------------------- +/// Top-level Type value, enabling extensible type-specific metadata. We can +/// add new logical types to Type without breaking backwards compatibility +enum Type : uint8_t { + Type_NONE = 0, + Type_Null = 1, + Type_Int = 2, + Type_FloatingPoint = 3, + Type_Binary = 4, + Type_Utf8 = 5, + Type_Bool = 6, + Type_Decimal = 7, + Type_Date = 8, + Type_Time = 9, + Type_Timestamp = 10, + Type_Interval = 11, + Type_List = 12, + Type_Struct_ = 13, + Type_Union = 14, + Type_FixedSizeBinary = 15, + Type_FixedSizeList = 16, + Type_Map = 17, + Type_Duration = 18, + Type_LargeBinary = 19, + Type_LargeUtf8 = 20, + Type_LargeList = 21, + Type_RunEndEncoded = 22, + Type_BinaryView = 23, + Type_Utf8View = 24, + Type_ListView = 25, + Type_LargeListView = 26, + Type_MIN = Type_NONE, + Type_MAX = Type_LargeListView +}; + +inline const Type (&EnumValuesType())[27] +{ + static const Type values[] = { + Type_NONE, Type_Null, Type_Int, Type_FloatingPoint, + Type_Binary, Type_Utf8, Type_Bool, Type_Decimal, + Type_Date, Type_Time, Type_Timestamp, Type_Interval, + Type_List, Type_Struct_, Type_Union, Type_FixedSizeBinary, + Type_FixedSizeList, Type_Map, Type_Duration, Type_LargeBinary, + Type_LargeUtf8, Type_LargeList, Type_RunEndEncoded, Type_BinaryView, + Type_Utf8View, Type_ListView, Type_LargeListView}; + return values; +} + +inline const char* const* EnumNamesType() +{ + static const char* const names[28] = { + "NONE", "Null", "Int", "FloatingPoint", + "Binary", "Utf8", "Bool", "Decimal", + "Date", "Time", "Timestamp", "Interval", + "List", "Struct_", "Union", "FixedSizeBinary", + "FixedSizeList", "Map", "Duration", "LargeBinary", + "LargeUtf8", "LargeList", "RunEndEncoded", "BinaryView", + "Utf8View", "ListView", "LargeListView", nullptr}; + return names; +} + +inline const char* EnumNameType(Type e) +{ + if (::flatbuffers::IsOutRange(e, Type_NONE, Type_LargeListView)) return ""; + const size_t index = static_cast(e); + return EnumNamesType()[index]; +} + +template +struct TypeTraits { + static const Type enum_value = Type_NONE; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Null; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Int; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_FloatingPoint; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Binary; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Utf8; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Bool; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Decimal; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Date; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Time; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Timestamp; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Interval; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_List; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Struct_; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Union; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_FixedSizeBinary; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_FixedSizeList; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Map; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Duration; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeBinary; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeUtf8; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeList; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_RunEndEncoded; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_BinaryView; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Utf8View; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_ListView; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeListView; +}; + +bool VerifyType(::flatbuffers::Verifier& verifier, const void* obj, Type type); +bool VerifyTypeVector(::flatbuffers::Verifier& verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset>* values, + const ::flatbuffers::Vector* types); + +/// ---------------------------------------------------------------------- +/// Dictionary encoding metadata +/// Maintained for forwards compatibility, in the future +/// Dictionaries might be explicit maps between integers and values +/// allowing for non-contiguous index values +enum DictionaryKind : int16_t { + DictionaryKind_DenseArray = 0, + DictionaryKind_MIN = DictionaryKind_DenseArray, + DictionaryKind_MAX = DictionaryKind_DenseArray +}; + +inline const DictionaryKind (&EnumValuesDictionaryKind())[1] +{ + static const DictionaryKind values[] = {DictionaryKind_DenseArray}; + return values; +} + +inline const char* const* EnumNamesDictionaryKind() +{ + static const char* const names[2] = {"DenseArray", nullptr}; + return names; +} + +inline const char* EnumNameDictionaryKind(DictionaryKind e) +{ + if (::flatbuffers::IsOutRange(e, DictionaryKind_DenseArray, DictionaryKind_DenseArray)) return ""; + const size_t index = static_cast(e); + return EnumNamesDictionaryKind()[index]; +} + +/// ---------------------------------------------------------------------- +/// Endianness of the platform producing the data +enum Endianness : int16_t { + Endianness_Little = 0, + Endianness_Big = 1, + Endianness_MIN = Endianness_Little, + Endianness_MAX = Endianness_Big +}; + +inline const Endianness (&EnumValuesEndianness())[2] +{ + static const Endianness values[] = {Endianness_Little, Endianness_Big}; + return values; +} + +inline const char* const* EnumNamesEndianness() +{ + static const char* const names[3] = {"Little", "Big", nullptr}; + return names; +} + +inline const char* EnumNameEndianness(Endianness e) +{ + if (::flatbuffers::IsOutRange(e, Endianness_Little, Endianness_Big)) return ""; + const size_t index = static_cast(e); + return EnumNamesEndianness()[index]; +} + +/// ---------------------------------------------------------------------- +/// A Buffer represents a single contiguous memory segment +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Buffer FLATBUFFERS_FINAL_CLASS +{ + private: + int64_t offset_; + int64_t length_; + + public: + Buffer() : offset_(0), length_(0) {} + Buffer(int64_t _offset, int64_t _length) + : offset_(::flatbuffers::EndianScalar(_offset)), length_(::flatbuffers::EndianScalar(_length)) + { + } + /// The relative offset into the shared memory page where the bytes for this + /// buffer starts + int64_t offset() const { return ::flatbuffers::EndianScalar(offset_); } + /// The absolute length (in bytes) of the memory buffer. The memory is found + /// from offset (inclusive) to offset + length (non-inclusive). When building + /// messages using the encapsulated IPC message, padding bytes may be written + /// after a buffer, but such padding bytes do not need to be accounted for in + /// the size here. + int64_t length() const { return ::flatbuffers::EndianScalar(length_); } +}; +FLATBUFFERS_STRUCT_END(Buffer, 16); + +/// These are stored in the flatbuffer in the Type union below +struct Null FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef NullBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct NullBuilder { + typedef Null Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit NullBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateNull(::flatbuffers::FlatBufferBuilder& _fbb) +{ + NullBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +/// (according to the physical memory layout). We used Struct_ here as +/// Struct is a reserved word in Flatbuffers +struct Struct_ FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Struct_Builder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Struct_Builder { + typedef Struct_ Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit Struct_Builder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateStruct_(::flatbuffers::FlatBufferBuilder& _fbb) +{ + Struct_Builder builder_(_fbb); + return builder_.Finish(); +} + +struct List FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ListBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ListBuilder { + typedef List Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit ListBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateList(::flatbuffers::FlatBufferBuilder& _fbb) +{ + ListBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as List, but with 64-bit offsets, allowing to represent +/// extremely large data values. +struct LargeList FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeListBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeListBuilder { + typedef LargeList Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeListBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeList(::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeListBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Represents the same logical types that List can, but contains offsets and +/// sizes allowing for writes in any order and sharing of child values among +/// list values. +struct ListView FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ListViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ListViewBuilder { + typedef ListView Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit ListViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateListView(::flatbuffers::FlatBufferBuilder& _fbb) +{ + ListViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent +/// extremely large data values. +struct LargeListView FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeListViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeListViewBuilder { + typedef LargeListView Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeListViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeListView( + ::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeListViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FixedSizeList FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FixedSizeListBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_LISTSIZE = 4 }; + /// Number of list items per value + int32_t listSize() const { return GetField(VT_LISTSIZE, 0); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_LISTSIZE, 4) && + verifier.EndTable(); + } +}; + +struct FixedSizeListBuilder { + typedef FixedSizeList Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_listSize(int32_t listSize) + { + fbb_.AddElement(FixedSizeList::VT_LISTSIZE, listSize, 0); + } + explicit FixedSizeListBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFixedSizeList( + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t listSize = 0) +{ + FixedSizeListBuilder builder_(_fbb); + builder_.add_listSize(listSize); + return builder_.Finish(); +} + +/// A Map is a logical nested type that is represented as +/// +/// List> +/// +/// In this layout, the keys and values are each respectively contiguous. We do +/// not constrain the key and value types, so the application is responsible +/// for ensuring that the keys are hashable and unique. Whether the keys are sorted +/// may be set in the metadata for this field. +/// +/// In a field with Map type, the field has a child Struct field, which then +/// has two children: key type and the second the value type. The names of the +/// child fields may be respectively "entries", "key", and "value", but this is +/// not enforced. +/// +/// Map +/// ```text +/// - child[0] entries: Struct +/// - child[0] key: K +/// - child[1] value: V +/// ``` +/// Neither the "entries" field nor the "key" field may be nullable. +/// +/// The metadata is structured so that Arrow systems without special handling +/// for Map can make Map an alias for List. The "layout" attribute for the Map +/// field must have the same contents as a List. +struct Map FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MapBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_KEYSSORTED = 4 }; + /// Set to true if the keys within each value are sorted + bool keysSorted() const { return GetField(VT_KEYSSORTED, 0) != 0; } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_KEYSSORTED, 1) && + verifier.EndTable(); + } +}; + +struct MapBuilder { + typedef Map Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_keysSorted(bool keysSorted) + { + fbb_.AddElement(Map::VT_KEYSSORTED, static_cast(keysSorted), 0); + } + explicit MapBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMap(::flatbuffers::FlatBufferBuilder& _fbb, + bool keysSorted = false) +{ + MapBuilder builder_(_fbb); + builder_.add_keysSorted(keysSorted); + return builder_.Finish(); +} + +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child `typeIds[offset]` is the id used in the type vector +struct Union FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnionBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_MODE = 4, VT_TYPEIDS = 6 }; + cudf::io::parquet::flatbuf::UnionMode mode() const + { + return static_cast(GetField(VT_MODE, 0)); + } + const ::flatbuffers::Vector* typeIds() const + { + return GetPointer*>(VT_TYPEIDS); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_MODE, 2) && + VerifyOffset(verifier, VT_TYPEIDS) && verifier.VerifyVector(typeIds()) && + verifier.EndTable(); + } +}; + +struct UnionBuilder { + typedef Union Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_mode(cudf::io::parquet::flatbuf::UnionMode mode) + { + fbb_.AddElement(Union::VT_MODE, static_cast(mode), 0); + } + void add_typeIds(::flatbuffers::Offset<::flatbuffers::Vector> typeIds) + { + fbb_.AddOffset(Union::VT_TYPEIDS, typeIds); + } + explicit UnionBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnion( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::UnionMode mode = cudf::io::parquet::flatbuf::UnionMode_Sparse, + ::flatbuffers::Offset<::flatbuffers::Vector> typeIds = 0) +{ + UnionBuilder builder_(_fbb); + builder_.add_typeIds(typeIds); + builder_.add_mode(mode); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateUnionDirect( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::UnionMode mode = cudf::io::parquet::flatbuf::UnionMode_Sparse, + const std::vector* typeIds = nullptr) +{ + auto typeIds__ = typeIds ? _fbb.CreateVector(*typeIds) : 0; + return cudf::io::parquet::flatbuf::CreateUnion(_fbb, mode, typeIds__); +} + +struct Int FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef IntBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BITWIDTH = 4, + VT_IS_SIGNED = 6 + }; + int32_t bitWidth() const { return GetField(VT_BITWIDTH, 0); } + bool is_signed() const { return GetField(VT_IS_SIGNED, 0) != 0; } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_BITWIDTH, 4) && + VerifyField(verifier, VT_IS_SIGNED, 1) && verifier.EndTable(); + } +}; + +struct IntBuilder { + typedef Int Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_bitWidth(int32_t bitWidth) { fbb_.AddElement(Int::VT_BITWIDTH, bitWidth, 0); } + void add_is_signed(bool is_signed) + { + fbb_.AddElement(Int::VT_IS_SIGNED, static_cast(is_signed), 0); + } + explicit IntBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateInt(::flatbuffers::FlatBufferBuilder& _fbb, + int32_t bitWidth = 0, + bool is_signed = false) +{ + IntBuilder builder_(_fbb); + builder_.add_bitWidth(bitWidth); + builder_.add_is_signed(is_signed); + return builder_.Finish(); +} + +struct FloatingPoint FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FloatingPointBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PRECISION = 4 }; + cudf::io::parquet::flatbuf::Precision precision() const + { + return static_cast(GetField(VT_PRECISION, 0)); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_PRECISION, 2) && + verifier.EndTable(); + } +}; + +struct FloatingPointBuilder { + typedef FloatingPoint Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_precision(cudf::io::parquet::flatbuf::Precision precision) + { + fbb_.AddElement(FloatingPoint::VT_PRECISION, static_cast(precision), 0); + } + explicit FloatingPointBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFloatingPoint( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::Precision precision = cudf::io::parquet::flatbuf::Precision_HALF) +{ + FloatingPointBuilder builder_(_fbb); + builder_.add_precision(precision); + return builder_.Finish(); +} + +/// Unicode with UTF-8 encoding +struct Utf8 FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Utf8Builder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Utf8Builder { + typedef Utf8 Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit Utf8Builder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUtf8(::flatbuffers::FlatBufferBuilder& _fbb) +{ + Utf8Builder builder_(_fbb); + return builder_.Finish(); +} + +/// Opaque binary data +struct Binary FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BinaryBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BinaryBuilder { + typedef Binary Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit BinaryBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBinary(::flatbuffers::FlatBufferBuilder& _fbb) +{ + BinaryBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as Utf8, but with 64-bit offsets, allowing to represent +/// extremely large data values. +struct LargeUtf8 FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeUtf8Builder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeUtf8Builder { + typedef LargeUtf8 Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeUtf8Builder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeUtf8(::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeUtf8Builder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as Binary, but with 64-bit offsets, allowing to represent +/// extremely large data values. +struct LargeBinary FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeBinaryBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeBinaryBuilder { + typedef LargeBinary Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeBinaryBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeBinary(::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeBinaryBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Logically the same as Utf8, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. +struct Utf8View FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Utf8ViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Utf8ViewBuilder { + typedef Utf8View Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit Utf8ViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUtf8View(::flatbuffers::FlatBufferBuilder& _fbb) +{ + Utf8ViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Logically the same as Binary, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. +struct BinaryView FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BinaryViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BinaryViewBuilder { + typedef BinaryView Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit BinaryViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBinaryView(::flatbuffers::FlatBufferBuilder& _fbb) +{ + BinaryViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FixedSizeBinary FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FixedSizeBinaryBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BYTEWIDTH = 4 }; + /// Number of bytes per value + int32_t byteWidth() const { return GetField(VT_BYTEWIDTH, 0); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_BYTEWIDTH, 4) && + verifier.EndTable(); + } +}; + +struct FixedSizeBinaryBuilder { + typedef FixedSizeBinary Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_byteWidth(int32_t byteWidth) + { + fbb_.AddElement(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0); + } + explicit FixedSizeBinaryBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFixedSizeBinary( + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t byteWidth = 0) +{ + FixedSizeBinaryBuilder builder_(_fbb); + builder_.add_byteWidth(byteWidth); + return builder_.Finish(); +} + +struct Bool FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BoolBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BoolBuilder { + typedef Bool Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit BoolBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBool(::flatbuffers::FlatBufferBuilder& _fbb) +{ + BoolBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Contains two child arrays, run_ends and values. +/// The run_ends child array must be a 16/32/64-bit integer array +/// which encodes the indices at which the run with the value in +/// each corresponding index in the values child array ends. +/// Like list/struct types, the value array can be of any type. +struct RunEndEncoded FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RunEndEncodedBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct RunEndEncodedBuilder { + typedef RunEndEncoded Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit RunEndEncodedBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRunEndEncoded( + ::flatbuffers::FlatBufferBuilder& _fbb) +{ + RunEndEncodedBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Exact decimal value represented as an integer value in two's +/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +/// are used. The representation uses the endianness indicated +/// in the Schema. +struct Decimal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DecimalBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PRECISION = 4, + VT_SCALE = 6, + VT_BITWIDTH = 8 + }; + /// Total number of decimal digits + int32_t precision() const { return GetField(VT_PRECISION, 0); } + /// Number of digits after the decimal point "." + int32_t scale() const { return GetField(VT_SCALE, 0); } + /// Number of bits per value. The only accepted widths are 128 and 256. + /// We use bitWidth for consistency with Int::bitWidth. + int32_t bitWidth() const { return GetField(VT_BITWIDTH, 128); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_PRECISION, 4) && + VerifyField(verifier, VT_SCALE, 4) && + VerifyField(verifier, VT_BITWIDTH, 4) && verifier.EndTable(); + } +}; + +struct DecimalBuilder { + typedef Decimal Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_precision(int32_t precision) + { + fbb_.AddElement(Decimal::VT_PRECISION, precision, 0); + } + void add_scale(int32_t scale) { fbb_.AddElement(Decimal::VT_SCALE, scale, 0); } + void add_bitWidth(int32_t bitWidth) + { + fbb_.AddElement(Decimal::VT_BITWIDTH, bitWidth, 128); + } + explicit DecimalBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDecimal(::flatbuffers::FlatBufferBuilder& _fbb, + int32_t precision = 0, + int32_t scale = 0, + int32_t bitWidth = 128) +{ + DecimalBuilder builder_(_fbb); + builder_.add_bitWidth(bitWidth); + builder_.add_scale(scale); + builder_.add_precision(precision); + return builder_.Finish(); +} + +/// Date is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +/// +/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +/// leap seconds), where the values are evenly divisible by 86400000 +/// * Days (32 bits) since the UNIX epoch +struct Date FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DateBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_UNIT = 4 }; + cudf::io::parquet::flatbuf::DateUnit unit() const + { + return static_cast(GetField(VT_UNIT, 1)); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_UNIT, 2) && + verifier.EndTable(); + } +}; + +struct DateBuilder { + typedef Date Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_unit(cudf::io::parquet::flatbuf::DateUnit unit) + { + fbb_.AddElement(Date::VT_UNIT, static_cast(unit), 1); + } + explicit DateBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDate( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::DateUnit unit = cudf::io::parquet::flatbuf::DateUnit_MILLISECOND) +{ + DateBuilder builder_(_fbb); + builder_.add_unit(unit); + return builder_.Finish(); +} + +/// Time is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since midnight, stored in either of four units: seconds, +/// milliseconds, microseconds or nanoseconds. +/// +/// The integer `bitWidth` depends on the `unit` and must be one of the following: +/// * SECOND and MILLISECOND: 32 bits +/// * MICROSECOND and NANOSECOND: 64 bits +/// +/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +/// (exclusive), adjusted for the time unit (for example, up to 86400000 +/// exclusive for the MILLISECOND unit). +/// This definition doesn't allow for leap seconds. Time values from +/// measurements with leap seconds will need to be corrected when ingesting +/// into Arrow (for example by replacing the value 86400 with 86399). +struct Time FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TimeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_UNIT = 4, VT_BITWIDTH = 6 }; + cudf::io::parquet::flatbuf::TimeUnit unit() const + { + return static_cast(GetField(VT_UNIT, 1)); + } + int32_t bitWidth() const { return GetField(VT_BITWIDTH, 32); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_UNIT, 2) && + VerifyField(verifier, VT_BITWIDTH, 4) && verifier.EndTable(); + } +}; + +struct TimeBuilder { + typedef Time Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_unit(cudf::io::parquet::flatbuf::TimeUnit unit) + { + fbb_.AddElement(Time::VT_UNIT, static_cast(unit), 1); + } + void add_bitWidth(int32_t bitWidth) { fbb_.AddElement(Time::VT_BITWIDTH, bitWidth, 32); } + explicit TimeBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset