From 053f7da21c47f7caf739c999bbb424356ee18cd7 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb Date: Tue, 30 Apr 2024 02:53:43 +0000 Subject: [PATCH 01/53] Read duration type in cudf parquet via arrow:schema --- .../cudf/detail/utilities/base64_utils.hpp | 159 + cpp/include/cudf/io/ipc/Message.fbs | 155 + cpp/include/cudf/io/ipc/Schema.fbs | 570 ++++ .../cudf/io/ipc/detail/Message_generated.h | 651 ++++ .../cudf/io/ipc/detail/Schema_generated.h | 2769 +++++++++++++++++ .../cudf/io/ipc/flatbuffers/allocator.h | 73 + cpp/include/cudf/io/ipc/flatbuffers/array.h | 267 ++ cpp/include/cudf/io/ipc/flatbuffers/base.h | 498 +++ cpp/include/cudf/io/ipc/flatbuffers/buffer.h | 222 ++ .../cudf/io/ipc/flatbuffers/buffer_ref.h | 58 + .../io/ipc/flatbuffers/default_allocator.h | 70 + .../cudf/io/ipc/flatbuffers/detached_buffer.h | 128 + .../io/ipc/flatbuffers/flatbuffer_builder.h | 1574 ++++++++++ .../cudf/io/ipc/flatbuffers/flatbuffers.h | 289 ++ .../cudf/io/ipc/flatbuffers/stl_emulation.h | 568 ++++ cpp/include/cudf/io/ipc/flatbuffers/string.h | 70 + cpp/include/cudf/io/ipc/flatbuffers/struct.h | 60 + cpp/include/cudf/io/ipc/flatbuffers/table.h | 202 ++ cpp/include/cudf/io/ipc/flatbuffers/vector.h | 416 +++ .../cudf/io/ipc/flatbuffers/vector_downward.h | 316 ++ .../cudf/io/ipc/flatbuffers/verifier.h | 389 +++ cpp/src/io/functions.cpp | 3 + cpp/src/io/parquet/reader_impl.cpp | 42 +- cpp/src/io/parquet/reader_impl_helpers.cpp | 628 ++-- cpp/src/io/parquet/reader_impl_helpers.hpp | 22 +- 25 files changed, 9993 insertions(+), 206 deletions(-) create mode 100644 cpp/include/cudf/detail/utilities/base64_utils.hpp create mode 100644 cpp/include/cudf/io/ipc/Message.fbs create mode 100644 cpp/include/cudf/io/ipc/Schema.fbs create mode 100644 cpp/include/cudf/io/ipc/detail/Message_generated.h create mode 100644 cpp/include/cudf/io/ipc/detail/Schema_generated.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/allocator.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/array.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/base.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/buffer.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/buffer_ref.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/default_allocator.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/detached_buffer.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/flatbuffer_builder.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/flatbuffers.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/stl_emulation.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/string.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/struct.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/table.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/vector.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/vector_downward.h create mode 100644 cpp/include/cudf/io/ipc/flatbuffers/verifier.h diff --git a/cpp/include/cudf/detail/utilities/base64_utils.hpp b/cpp/include/cudf/detail/utilities/base64_utils.hpp new file mode 100644 index 00000000000..fd8e85ec382 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/base64_utils.hpp @@ -0,0 +1,159 @@ +/* + base64_utils.cpp and base64_utils.hpp + + base64 encoding and decoding with C++. + + Version: 1.01.00 + + Copyright (C) 2004-2017 René Nyffenegger + + This source code is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + + 3. This notice may not be removed or altered from any source distribution. + + René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +*/ + +/** + * @file base64_utils.hpp + * @brief base64 string encoding/decoding utilities and implementation + */ + +#pragma once + +// altered: including required std headers +#include +#include +#include +#include + +// altered: merged base64.h and base64.cpp into one file. +// altered: applying clang-format for libcudf on this file. + +// altered: use cudf namespaces +namespace cudf::detail { + +static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +static inline auto is_base64(unsigned char c) { return (isalnum(c) or (c == '+') or (c == '/')); } + +// merging the encoder wrapper into the single function +std::string base64_encode(std::string_view string_to_encode) +{ + // get bytes to encode and length + auto bytes_to_encode = reinterpret_cast(string_to_encode.data()); + auto input_length = string_to_encode.size(); + + std::string encoded; + std::array char_array_4; + std::array char_array_3; + int i = 0; + int j = 0; + + // altered: added braces to one liner loops in the rest of this function + while (input_length--) { + char_array_3[i++] = *(bytes_to_encode++); + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for (i = 0; (i < 4); i++) { + encoded += base64_chars[char_array_4[i]]; + } + i = 0; + } + } + + if (i) { + for (j = i; j < 3; j++) { + char_array_3[j] = '\0'; + } + + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + + for (j = 0; (j < i + 1); j++) { + encoded += base64_chars[char_array_4[j]]; + } + while ((i++ < 3)) { + encoded += '='; + } + } + + return encoded; +} + +// base64 decode lambda function +std::string base64_decode(std::string_view encoded_string) +{ + std::array char_array_4; + std::array char_array_3; + std::string decoded; + size_t input_len = encoded_string.size(); + + int i = 0; + int j = 0; + int in_ = 0; + + // altered: added braces to one liner loops in the rest of this function + while (input_len-- and (encoded_string[in_] != '=') and is_base64(encoded_string[in_])) { + char_array_4[i++] = encoded_string[in_]; + in_++; + if (i == 4) { + for (i = 0; i < 4; i++) { + char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff; + } + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) { + decoded += char_array_3[i]; + } + i = 0; + } + } + + // altered: modify to i!=0 for better readability + if (i != 0) { + for (j = 0; j < i; j++) { + char_array_4[j] = base64_chars.find(char_array_4[j]) & 0xff; + } + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + // altered: TODO: arrow source code doesn't have the below line. + // altered: This is inconsequential as it is never appended to + // altered: `decoded` as max(i) = 3 and 0 <= j < 2. + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; j < i - 1; j++) { + decoded += char_array_3[j]; + } + } + + return decoded; +} + +} // namespace cudf::detail diff --git a/cpp/include/cudf/io/ipc/Message.fbs b/cpp/include/cudf/io/ipc/Message.fbs new file mode 100644 index 00000000000..8a65c2e3cf9 --- /dev/null +++ b/cpp/include/cudf/io/ipc/Message.fbs @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +include "Schema.fbs"; + +namespace cudf.io.parquet.flatbuf; + +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) + +/// Metadata about a field at some level of a nested type tree (but not +/// its children). +/// +/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` +/// would have {length: 5, null_count: 2} for its List node, and {length: 6, +/// null_count: 0} for its Int16 node, as separate FieldNode structs +struct FieldNode { + /// The number of value slots in the Arrow array at this level of a nested + /// tree + length: long; + + /// The number of observed nulls. Fields with null_count == 0 may choose not + /// to write their physical validity bitmap out as a materialized buffer, + /// instead setting the length of the bitmap buffer to 0. + null_count: long; +} + +enum CompressionType:byte { + // LZ4 frame format, for portability, as provided by lz4frame.h or wrappers + // thereof. Not to be confused with "raw" (also called "block") format + // provided by lz4.h + LZ4_FRAME, + + // Zstandard + ZSTD +} + +/// Provided for forward compatibility in case we need to support different +/// strategies for compressing the IPC message body (like whole-body +/// compression rather than buffer-level) in the future +enum BodyCompressionMethod:byte { + /// Each constituent buffer is first compressed with the indicated + /// compressor, and then written with the uncompressed length in the first 8 + /// bytes as a 64-bit little-endian signed integer followed by the compressed + /// buffer bytes (and then padding as required by the protocol). The + /// uncompressed length may be set to -1 to indicate that the data that + /// follows is not compressed, which can be useful for cases where + /// compression does not yield appreciable savings. + BUFFER +} + +/// Optional compression for the memory buffers constituting IPC message +/// bodies. Intended for use with RecordBatch but could be used for other +/// message types +table BodyCompression { + /// Compressor library. + /// For LZ4_FRAME, each compressed buffer must consist of a single frame. + codec: CompressionType = LZ4_FRAME; + + /// Indicates the way the record batch body was compressed + method: BodyCompressionMethod = BUFFER; +} + +/// A data header describing the shared memory layout of a "record" or "row" +/// batch. Some systems call this a "row batch" internally and others a "record +/// batch". +table RecordBatch { + /// number of records / rows. The arrays in the batch should all have this + /// length + length: long; + + /// Nodes correspond to the pre-ordered flattened logical schema + nodes: [FieldNode]; + + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the schema. For + /// example, most primitive arrays will have 2 buffers, 1 for the validity + /// bitmap and 1 for the values. For struct arrays, there will only be a + /// single buffer for the validity (nulls) bitmap + buffers: [Buffer]; + + /// Optional compression of the message body + compression: BodyCompression; + + /// Some types such as Utf8View are represented using a variable number of buffers. + /// For each such Field in the pre-ordered flattened logical schema, there will be + /// an entry in variadicBufferCounts to indicate the number of number of variadic + /// buffers which belong to that Field in the current RecordBatch. + /// + /// For example, the schema + /// col1: Struct + /// col2: Utf8View + /// contains two Fields with variadic buffers so variadicBufferCounts will have + /// two entries, the first counting the variadic buffers of `col1.beta` and the + /// second counting `col2`'s. + /// + /// This field may be omitted if and only if the schema contains no Fields with + /// a variable number of buffers, such as BinaryView and Utf8View. + variadicBufferCounts: [long]; +} + +/// For sending dictionary encoding information. Any Field can be +/// dictionary-encoded, but in this case none of its children may be +/// dictionary-encoded. +/// There is one vector / column per dictionary, but that vector / column +/// may be spread across multiple dictionary batches by using the isDelta +/// flag + +table DictionaryBatch { + id: long; + data: RecordBatch; + + /// If isDelta is true the values in the dictionary are to be appended to a + /// dictionary with the indicated id. If isDelta is false this dictionary + /// should replace the existing dictionary. + isDelta: bool = false; +} + +/// ---------------------------------------------------------------------- +/// The root Message type + +/// This union enables us to easily send different message types without +/// redundant storage, and in the future we can easily add new message types. +/// +/// Arrow implementations do not need to implement all of the message types, +/// which may include experimental metadata types. For maximum compatibility, +/// it is best to send data using RecordBatch +union MessageHeader { + Schema +} + +table Message { + version: cudf.io.parquet.flatbuf.MetadataVersion; + header: MessageHeader; + bodyLength: long; + custom_metadata: [ KeyValue ]; +} + +root_type Message; diff --git a/cpp/include/cudf/io/ipc/Schema.fbs b/cpp/include/cudf/io/ipc/Schema.fbs new file mode 100644 index 00000000000..8479785b261 --- /dev/null +++ b/cpp/include/cudf/io/ipc/Schema.fbs @@ -0,0 +1,570 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Logical types, vector layouts, and schemas + +/// Format Version History. +/// Version 1.0 - Forward and backwards compatibility guaranteed. +/// Version 1.1 - Add Decimal256. +/// Version 1.2 - Add Interval MONTH_DAY_NANO. +/// Version 1.3 - Add Run-End Encoded. +/// Version 1.4 - Add BinaryView, Utf8View, variadicBufferCounts, ListView, and +/// LargeListView. + +namespace cudf.io.parquet.flatbuf; + +enum MetadataVersion:short { + /// 0.1.0 (October 2016). + V1, + + /// 0.2.0 (February 2017). Non-backwards compatible with V1. + V2, + + /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. + V3, + + /// >= 0.8.0 (December 2017). Non-backwards compatible with V3. + V4, + + /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4 + /// metadata and IPC messages). Implementations are recommended to provide a + /// V4 compatibility mode with V5 format changes disabled. + /// + /// Incompatible changes between V4 and V5: + /// - Union buffer layout has changed. In V5, Unions don't have a validity + /// bitmap buffer. + V5, +} + +/// Represents Arrow Features that might not have full support +/// within implementations. This is intended to be used in +/// two scenarios: +/// 1. A mechanism for readers of Arrow Streams +/// and files to understand that the stream or file makes +/// use of a feature that isn't supported or unknown to +/// the implementation (and therefore can meet the Arrow +/// forward compatibility guarantees). +/// 2. A means of negotiating between a client and server +/// what features a stream is allowed to use. The enums +/// values here are intented to represent higher level +/// features, additional details maybe negotiated +/// with key-value pairs specific to the protocol. +/// +/// Enums added to this list should be assigned power-of-two values +/// to facilitate exchanging and comparing bitmaps for supported +/// features. +enum Feature : long { + /// Needed to make flatbuffers happy. + UNUSED = 0, + /// The stream makes use of multiple full dictionaries with the + /// same ID and assumes clients implement dictionary replacement + /// correctly. + DICTIONARY_REPLACEMENT = 1, + /// The stream makes use of compressed bodies as described + /// in Message.fbs. + COMPRESSED_BODY = 2 +} + +/// These are stored in the flatbuffer in the Type union below + +table Null { +} + +/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +/// (according to the physical memory layout). We used Struct_ here as +/// Struct is a reserved word in Flatbuffers +table Struct_ { +} + +table List { +} + +/// Same as List, but with 64-bit offsets, allowing to represent +/// extremely large data values. +table LargeList { +} + +/// Represents the same logical types that List can, but contains offsets and +/// sizes allowing for writes in any order and sharing of child values among +/// list values. +table ListView { +} + +/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent +/// extremely large data values. +table LargeListView { +} + +table FixedSizeList { + /// Number of list items per value + listSize: int; +} + +/// A Map is a logical nested type that is represented as +/// +/// List> +/// +/// In this layout, the keys and values are each respectively contiguous. We do +/// not constrain the key and value types, so the application is responsible +/// for ensuring that the keys are hashable and unique. Whether the keys are sorted +/// may be set in the metadata for this field. +/// +/// In a field with Map type, the field has a child Struct field, which then +/// has two children: key type and the second the value type. The names of the +/// child fields may be respectively "entries", "key", and "value", but this is +/// not enforced. +/// +/// Map +/// ```text +/// - child[0] entries: Struct +/// - child[0] key: K +/// - child[1] value: V +/// ``` +/// Neither the "entries" field nor the "key" field may be nullable. +/// +/// The metadata is structured so that Arrow systems without special handling +/// for Map can make Map an alias for List. The "layout" attribute for the Map +/// field must have the same contents as a List. +table Map { + /// Set to true if the keys within each value are sorted + keysSorted: bool; +} + +enum UnionMode:short { Sparse, Dense } + +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child `typeIds[offset]` is the id used in the type vector +table Union { + mode: UnionMode; + typeIds: [ int ]; // optional, describes typeid of each child. +} + +table Int { + bitWidth: int; // restricted to 8, 16, 32, and 64 in v1 + is_signed: bool; +} + +enum Precision:short {HALF, SINGLE, DOUBLE} + +table FloatingPoint { + precision: Precision; +} + +/// Unicode with UTF-8 encoding +table Utf8 { +} + +/// Opaque binary data +table Binary { +} + +/// Same as Utf8, but with 64-bit offsets, allowing to represent +/// extremely large data values. +table LargeUtf8 { +} + +/// Same as Binary, but with 64-bit offsets, allowing to represent +/// extremely large data values. +table LargeBinary { +} + +/// Logically the same as Utf8, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. +table Utf8View { +} + +/// Logically the same as Binary, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. +table BinaryView { +} + + +table FixedSizeBinary { + /// Number of bytes per value + byteWidth: int; +} + +table Bool { +} + +/// Contains two child arrays, run_ends and values. +/// The run_ends child array must be a 16/32/64-bit integer array +/// which encodes the indices at which the run with the value in +/// each corresponding index in the values child array ends. +/// Like list/struct types, the value array can be of any type. +table RunEndEncoded { +} + +/// Exact decimal value represented as an integer value in two's +/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +/// are used. The representation uses the endianness indicated +/// in the Schema. +table Decimal { + /// Total number of decimal digits + precision: int; + + /// Number of digits after the decimal point "." + scale: int; + + /// Number of bits per value. The only accepted widths are 128 and 256. + /// We use bitWidth for consistency with Int::bitWidth. + bitWidth: int = 128; +} + +enum DateUnit: short { + DAY, + MILLISECOND +} + +/// Date is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +/// +/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +/// leap seconds), where the values are evenly divisible by 86400000 +/// * Days (32 bits) since the UNIX epoch +table Date { + unit: DateUnit = MILLISECOND; +} + +enum TimeUnit: short { SECOND, MILLISECOND, MICROSECOND, NANOSECOND } + +/// Time is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since midnight, stored in either of four units: seconds, +/// milliseconds, microseconds or nanoseconds. +/// +/// The integer `bitWidth` depends on the `unit` and must be one of the following: +/// * SECOND and MILLISECOND: 32 bits +/// * MICROSECOND and NANOSECOND: 64 bits +/// +/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +/// (exclusive), adjusted for the time unit (for example, up to 86400000 +/// exclusive for the MILLISECOND unit). +/// This definition doesn't allow for leap seconds. Time values from +/// measurements with leap seconds will need to be corrected when ingesting +/// into Arrow (for example by replacing the value 86400 with 86399). +table Time { + unit: TimeUnit = MILLISECOND; + bitWidth: int = 32; +} + +/// Timestamp is a 64-bit signed integer representing an elapsed time since a +/// fixed epoch, stored in either of four units: seconds, milliseconds, +/// microseconds or nanoseconds, and is optionally annotated with a timezone. +/// +/// Timestamp values do not include any leap seconds (in other words, all +/// days are considered 86400 seconds long). +/// +/// Timestamps with a non-empty timezone +/// ------------------------------------ +/// +/// If a Timestamp column has a non-empty timezone value, its epoch is +/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone +/// (the Unix epoch), regardless of the Timestamp's own timezone. +/// +/// Therefore, timestamp values with a non-empty timezone correspond to +/// physical points in time together with some additional information about +/// how the data was obtained and/or how to display it (the timezone). +/// +/// For example, the timestamp value 0 with the timezone string "Europe/Paris" +/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the +/// application may prefer to display it as "January 1st 1970, 01h00" in +/// the Europe/Paris timezone (which is the same physical point in time). +/// +/// One consequence is that timestamp values with a non-empty timezone +/// can be compared and ordered directly, since they all share the same +/// well-known point of reference (the Unix epoch). +/// +/// Timestamps with an unset / empty timezone +/// ----------------------------------------- +/// +/// If a Timestamp column has no timezone value, its epoch is +/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. +/// +/// Therefore, timestamp values without a timezone cannot be meaningfully +/// interpreted as physical points in time, but only as calendar / clock +/// indications ("wall clock time") in an unspecified timezone. +/// +/// For example, the timestamp value 0 with an empty timezone string +/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there +/// is not enough information to interpret it as a well-defined physical +/// point in time. +/// +/// One consequence is that timestamp values without a timezone cannot +/// be reliably compared or ordered, since they may have different points of +/// reference. In particular, it is *not* possible to interpret an unset +/// or empty timezone as the same as "UTC". +/// +/// Conversion between timezones +/// ---------------------------- +/// +/// If a Timestamp column has a non-empty timezone, changing the timezone +/// to a different non-empty value is a metadata-only operation: +/// the timestamp values need not change as their point of reference remains +/// the same (the Unix epoch). +/// +/// However, if a Timestamp column has no timezone value, changing it to a +/// non-empty value requires to think about the desired semantics. +/// One possibility is to assume that the original timestamp values are +/// relative to the epoch of the timezone being set; timestamp values should +/// then adjusted to the Unix epoch (for example, changing the timezone from +/// empty to "Europe/Paris" would require converting the timestamp values +/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is +/// nevertheless correct). +/// +/// Guidelines for encoding data from external libraries +/// ---------------------------------------------------- +/// +/// Date & time libraries often have multiple different data types for temporal +/// data. In order to ease interoperability between different implementations the +/// Arrow project has some recommendations for encoding these types into a Timestamp +/// column. +/// +/// An "instant" represents a physical point in time that has no relevant timezone +/// (for example, astronomical data). To encode an instant, use a Timestamp with +/// the timezone string set to "UTC", and make sure the Timestamp values +/// are relative to the UTC epoch (January 1st 1970, midnight). +/// +/// A "zoned date-time" represents a physical point in time annotated with an +/// informative timezone (for example, the timezone in which the data was +/// recorded). To encode a zoned date-time, use a Timestamp with the timezone +/// string set to the name of the timezone, and make sure the Timestamp values +/// are relative to the UTC epoch (January 1st 1970, midnight). +/// +/// (There is some ambiguity between an instant and a zoned date-time with the +/// UTC timezone. Both of these are stored the same in Arrow. Typically, +/// this distinction does not matter. If it does, then an application should +/// use custom metadata or an extension type to distinguish between the two cases.) +/// +/// An "offset date-time" represents a physical point in time combined with an +/// explicit offset from UTC. To encode an offset date-time, use a Timestamp +/// with the timezone string set to the numeric timezone offset string +/// (e.g. "+03:00"), and make sure the Timestamp values are relative to +/// the UTC epoch (January 1st 1970, midnight). +/// +/// A "naive date-time" (also called "local date-time" in some libraries) +/// represents a wall clock time combined with a calendar date, but with +/// no indication of how to map this information to a physical point in time. +/// Naive date-times must be handled with care because of this missing +/// information, and also because daylight saving time (DST) may make +/// some values ambiguous or nonexistent. A naive date-time may be +/// stored as a struct with Date and Time fields. However, it may also be +/// encoded into a Timestamp column with an empty timezone. The timestamp +/// values should be computed "as if" the timezone of the date-time values +/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would +/// be encoded as timestamp value 0. +table Timestamp { + unit: TimeUnit; + + /// The timezone is an optional string indicating the name of a timezone, + /// one of: + /// + /// * As used in the Olson timezone database (the "tz database" or + /// "tzdata"), such as "America/New_York". + /// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", + /// such as "+07:30". + /// + /// Whether a timezone string is present indicates different semantics about + /// the data (see above). + timezone: string; +} + +enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO} +// A "calendar" interval which models types that don't necessarily +// have a precise duration without the context of a base timestamp (e.g. +// days can differ in length during day light savings time transitions). +// All integers in the types below are stored in the endianness indicated +// by the schema. +// +// YEAR_MONTH - Indicates the number of elapsed whole months, stored as +// 4-byte signed integers. +// DAY_TIME - Indicates the number of elapsed days and milliseconds (no leap seconds), +// stored as 2 contiguous 32-bit signed integers (8-bytes in total). Support +// of this IntervalUnit is not required for full arrow compatibility. +// MONTH_DAY_NANO - A triple of the number of elapsed months, days, and nanoseconds. +// The values are stored contiguously in 16-byte blocks. Months and days are +// encoded as 32-bit signed integers and nanoseconds is encoded as a 64-bit +// signed integer. Nanoseconds does not allow for leap seconds. Each field is +// independent (e.g. there is no constraint that nanoseconds have the same +// sign as days or that the quantity of nanoseconds represents less than a +// day's worth of time). +table Interval { + unit: IntervalUnit; +} + +// An absolute length of time unrelated to any calendar artifacts. +// +// For the purposes of Arrow Implementations, adding this value to a Timestamp +// ("t1") naively (i.e. simply summing the two number) is acceptable even +// though in some cases the resulting Timestamp (t2) would not account for +// leap-seconds during the elapsed time between "t1" and "t2". Similarly, +// representing the difference between two Unix timestamp is acceptable, but +// would yield a value that is possibly a few seconds off from the true elapsed +// time. +// +// The resolution defaults to millisecond, but can be any of the other +// supported TimeUnit values as with Timestamp and Time types. This type is +// always represented as an 8-byte integer. +table Duration { + unit: TimeUnit = MILLISECOND; +} + +/// ---------------------------------------------------------------------- +/// Top-level Type value, enabling extensible type-specific metadata. We can +/// add new logical types to Type without breaking backwards compatibility + +union Type { + Null, + Int, + FloatingPoint, + Binary, + Utf8, + Bool, + Decimal, + Date, + Time, + Timestamp, + Interval, + List, + Struct_, + Union, + FixedSizeBinary, + FixedSizeList, + Map, + Duration, + LargeBinary, + LargeUtf8, + LargeList, + RunEndEncoded, + BinaryView, + Utf8View, + ListView, + LargeListView, +} + +/// ---------------------------------------------------------------------- +/// user defined key value pairs to add custom metadata to arrow +/// key namespacing is the responsibility of the user + +table KeyValue { + key: string; + value: string; +} + +/// ---------------------------------------------------------------------- +/// Dictionary encoding metadata +/// Maintained for forwards compatibility, in the future +/// Dictionaries might be explicit maps between integers and values +/// allowing for non-contiguous index values +enum DictionaryKind : short { DenseArray } +table DictionaryEncoding { + /// The known dictionary id in the application where this data is used. In + /// the file or streaming formats, the dictionary ids are found in the + /// DictionaryBatch messages + id: long; + + /// The dictionary indices are constrained to be non-negative integers. If + /// this field is null, the indices must be signed int32. To maximize + /// cross-language compatibility and performance, implementations are + /// recommended to prefer signed integer types over unsigned integer types + /// and to avoid uint64 indices unless they are required by an application. + indexType: Int; + + /// By default, dictionaries are not ordered, or the order does not have + /// semantic meaning. In some statistical, applications, dictionary-encoding + /// is used to represent ordered categorical data, and we provide a way to + /// preserve that metadata here + isOrdered: bool; + + dictionaryKind: DictionaryKind; +} + +/// ---------------------------------------------------------------------- +/// A field represents a named column in a record / row batch or child of a +/// nested type. + +table Field { + /// Name is not required, in i.e. a List + name: string; + + /// Whether or not this field can contain nulls. Should be true in general. + nullable: bool; + + /// This is the type of the decoded value if the field is dictionary encoded. + type: Type; + + /// Present only if the field is dictionary encoded. + dictionary: DictionaryEncoding; + + /// children apply only to nested data types like Struct, List and Union. For + /// primitive types children will have length 0. + children: [ Field ]; + + /// User-defined metadata + custom_metadata: [ KeyValue ]; +} + +/// ---------------------------------------------------------------------- +/// Endianness of the platform producing the data + +enum Endianness:short { Little, Big } + +/// ---------------------------------------------------------------------- +/// A Buffer represents a single contiguous memory segment +struct Buffer { + /// The relative offset into the shared memory page where the bytes for this + /// buffer starts + offset: long; + + /// The absolute length (in bytes) of the memory buffer. The memory is found + /// from offset (inclusive) to offset + length (non-inclusive). When building + /// messages using the encapsulated IPC message, padding bytes may be written + /// after a buffer, but such padding bytes do not need to be accounted for in + /// the size here. + length: long; +} + +/// ---------------------------------------------------------------------- +/// A Schema describes the columns in a row batch + +table Schema { + + /// endianness of the buffer + /// it is Little Endian by default + /// if endianness doesn't match the underlying system then the vectors need to be converted + endianness: Endianness=Little; + + fields: [Field]; + // User-defined metadata + custom_metadata: [ KeyValue ]; + + /// Features used in the stream/file. + features : [ Feature ]; +} + +root_type Schema; diff --git a/cpp/include/cudf/io/ipc/detail/Message_generated.h b/cpp/include/cudf/io/ipc/detail/Message_generated.h new file mode 100644 index 00000000000..1cede9a963d --- /dev/null +++ b/cpp/include/cudf/io/ipc/detail/Message_generated.h @@ -0,0 +1,651 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_MESSAGE_CUDF_IO_PARQUET_FLATBUF_H_ +#define FLATBUFFERS_GENERATED_MESSAGE_CUDF_IO_PARQUET_FLATBUF_H_ + +#include + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 24 && FLATBUFFERS_VERSION_MINOR == 3 && + FLATBUFFERS_VERSION_REVISION == 25, + "Non-compatible flatbuffers version included"); + +#include + +namespace cudf { +namespace io { +namespace parquet { +namespace flatbuf { + +struct FieldNode; + +struct BodyCompression; +struct BodyCompressionBuilder; + +struct RecordBatch; +struct RecordBatchBuilder; + +struct DictionaryBatch; +struct DictionaryBatchBuilder; + +struct Message; +struct MessageBuilder; + +enum CompressionType : int8_t { + CompressionType_LZ4_FRAME = 0, + CompressionType_ZSTD = 1, + CompressionType_MIN = CompressionType_LZ4_FRAME, + CompressionType_MAX = CompressionType_ZSTD +}; + +inline const CompressionType (&EnumValuesCompressionType())[2] +{ + static const CompressionType values[] = {CompressionType_LZ4_FRAME, CompressionType_ZSTD}; + return values; +} + +inline const char* const* EnumNamesCompressionType() +{ + static const char* const names[3] = {"LZ4_FRAME", "ZSTD", nullptr}; + return names; +} + +inline const char* EnumNameCompressionType(CompressionType e) +{ + if (::flatbuffers::IsOutRange(e, CompressionType_LZ4_FRAME, CompressionType_ZSTD)) return ""; + const size_t index = static_cast(e); + return EnumNamesCompressionType()[index]; +} + +/// Provided for forward compatibility in case we need to support different +/// strategies for compressing the IPC message body (like whole-body +/// compression rather than buffer-level) in the future +enum BodyCompressionMethod : int8_t { + /// Each constituent buffer is first compressed with the indicated + /// compressor, and then written with the uncompressed length in the first 8 + /// bytes as a 64-bit little-endian signed integer followed by the compressed + /// buffer bytes (and then padding as required by the protocol). The + /// uncompressed length may be set to -1 to indicate that the data that + /// follows is not compressed, which can be useful for cases where + /// compression does not yield appreciable savings. + BodyCompressionMethod_BUFFER = 0, + BodyCompressionMethod_MIN = BodyCompressionMethod_BUFFER, + BodyCompressionMethod_MAX = BodyCompressionMethod_BUFFER +}; + +inline const BodyCompressionMethod (&EnumValuesBodyCompressionMethod())[1] +{ + static const BodyCompressionMethod values[] = {BodyCompressionMethod_BUFFER}; + return values; +} + +inline const char* const* EnumNamesBodyCompressionMethod() +{ + static const char* const names[2] = {"BUFFER", nullptr}; + return names; +} + +inline const char* EnumNameBodyCompressionMethod(BodyCompressionMethod e) +{ + if (::flatbuffers::IsOutRange(e, BodyCompressionMethod_BUFFER, BodyCompressionMethod_BUFFER)) + return ""; + const size_t index = static_cast(e); + return EnumNamesBodyCompressionMethod()[index]; +} + +/// ---------------------------------------------------------------------- +/// The root Message type +/// This union enables us to easily send different message types without +/// redundant storage, and in the future we can easily add new message types. +/// +/// Arrow implementations do not need to implement all of the message types, +/// which may include experimental metadata types. For maximum compatibility, +/// it is best to send data using RecordBatch +enum MessageHeader : uint8_t { + MessageHeader_NONE = 0, + MessageHeader_Schema = 1, + MessageHeader_MIN = MessageHeader_NONE, + MessageHeader_MAX = MessageHeader_Schema +}; + +inline const MessageHeader (&EnumValuesMessageHeader())[2] +{ + static const MessageHeader values[] = {MessageHeader_NONE, MessageHeader_Schema}; + return values; +} + +inline const char* const* EnumNamesMessageHeader() +{ + static const char* const names[3] = {"NONE", "Schema", nullptr}; + return names; +} + +inline const char* EnumNameMessageHeader(MessageHeader e) +{ + if (::flatbuffers::IsOutRange(e, MessageHeader_NONE, MessageHeader_Schema)) return ""; + const size_t index = static_cast(e); + return EnumNamesMessageHeader()[index]; +} + +template +struct MessageHeaderTraits { + static const MessageHeader enum_value = MessageHeader_NONE; +}; + +template <> +struct MessageHeaderTraits { + static const MessageHeader enum_value = MessageHeader_Schema; +}; + +bool VerifyMessageHeader(::flatbuffers::Verifier& verifier, const void* obj, MessageHeader type); +bool VerifyMessageHeaderVector(::flatbuffers::Verifier& verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset>* values, + const ::flatbuffers::Vector* types); + +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) +/// Metadata about a field at some level of a nested type tree (but not +/// its children). +/// +/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` +/// would have {length: 5, null_count: 2} for its List node, and {length: 6, +/// null_count: 0} for its Int16 node, as separate FieldNode structs +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) FieldNode FLATBUFFERS_FINAL_CLASS +{ + private: + int64_t length_; + int64_t null_count_; + + public: + FieldNode() : length_(0), null_count_(0) {} + FieldNode(int64_t _length, int64_t _null_count) + : length_(::flatbuffers::EndianScalar(_length)), + null_count_(::flatbuffers::EndianScalar(_null_count)) + { + } + /// The number of value slots in the Arrow array at this level of a nested + /// tree + int64_t length() const { return ::flatbuffers::EndianScalar(length_); } + /// The number of observed nulls. Fields with null_count == 0 may choose not + /// to write their physical validity bitmap out as a materialized buffer, + /// instead setting the length of the bitmap buffer to 0. + int64_t null_count() const { return ::flatbuffers::EndianScalar(null_count_); } +}; +FLATBUFFERS_STRUCT_END(FieldNode, 16); + +/// Optional compression for the memory buffers constituting IPC message +/// bodies. Intended for use with RecordBatch but could be used for other +/// message types +struct BodyCompression FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BodyCompressionBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_CODEC = 4, VT_METHOD = 6 }; + /// Compressor library. + /// For LZ4_FRAME, each compressed buffer must consist of a single frame. + cudf::io::parquet::flatbuf::CompressionType codec() const + { + return static_cast(GetField(VT_CODEC, 0)); + } + /// Indicates the way the record batch body was compressed + cudf::io::parquet::flatbuf::BodyCompressionMethod method() const + { + return static_cast( + GetField(VT_METHOD, 0)); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_CODEC, 1) && + VerifyField(verifier, VT_METHOD, 1) && verifier.EndTable(); + } +}; + +struct BodyCompressionBuilder { + typedef BodyCompression Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_codec(cudf::io::parquet::flatbuf::CompressionType codec) + { + fbb_.AddElement(BodyCompression::VT_CODEC, static_cast(codec), 0); + } + void add_method(cudf::io::parquet::flatbuf::BodyCompressionMethod method) + { + fbb_.AddElement(BodyCompression::VT_METHOD, static_cast(method), 0); + } + explicit BodyCompressionBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBodyCompression( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::CompressionType codec = + cudf::io::parquet::flatbuf::CompressionType_LZ4_FRAME, + cudf::io::parquet::flatbuf::BodyCompressionMethod method = + cudf::io::parquet::flatbuf::BodyCompressionMethod_BUFFER) +{ + BodyCompressionBuilder builder_(_fbb); + builder_.add_method(method); + builder_.add_codec(codec); + return builder_.Finish(); +} + +/// A data header describing the shared memory layout of a "record" or "row" +/// batch. Some systems call this a "row batch" internally and others a "record +/// batch". +struct RecordBatch FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RecordBatchBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_LENGTH = 4, + VT_NODES = 6, + VT_BUFFERS = 8, + VT_COMPRESSION = 10, + VT_VARIADICBUFFERCOUNTS = 12 + }; + /// number of records / rows. The arrays in the batch should all have this + /// length + int64_t length() const { return GetField(VT_LENGTH, 0); } + /// Nodes correspond to the pre-ordered flattened logical schema + const ::flatbuffers::Vector* nodes() const + { + return GetPointer*>( + VT_NODES); + } + /// Buffers correspond to the pre-ordered flattened buffer tree + /// + /// The number of buffers appended to this list depends on the schema. For + /// example, most primitive arrays will have 2 buffers, 1 for the validity + /// bitmap and 1 for the values. For struct arrays, there will only be a + /// single buffer for the validity (nulls) bitmap + const ::flatbuffers::Vector* buffers() const + { + return GetPointer*>( + VT_BUFFERS); + } + /// Optional compression of the message body + const cudf::io::parquet::flatbuf::BodyCompression* compression() const + { + return GetPointer(VT_COMPRESSION); + } + /// Some types such as Utf8View are represented using a variable number of buffers. + /// For each such Field in the pre-ordered flattened logical schema, there will be + /// an entry in variadicBufferCounts to indicate the number of number of variadic + /// buffers which belong to that Field in the current RecordBatch. + /// + /// For example, the schema + /// col1: Struct + /// col2: Utf8View + /// contains two Fields with variadic buffers so variadicBufferCounts will have + /// two entries, the first counting the variadic buffers of `col1.beta` and the + /// second counting `col2`'s. + /// + /// This field may be omitted if and only if the schema contains no Fields with + /// a variable number of buffers, such as BinaryView and Utf8View. + const ::flatbuffers::Vector* variadicBufferCounts() const + { + return GetPointer*>(VT_VARIADICBUFFERCOUNTS); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_LENGTH, 8) && + VerifyOffset(verifier, VT_NODES) && verifier.VerifyVector(nodes()) && + VerifyOffset(verifier, VT_BUFFERS) && verifier.VerifyVector(buffers()) && + VerifyOffset(verifier, VT_COMPRESSION) && verifier.VerifyTable(compression()) && + VerifyOffset(verifier, VT_VARIADICBUFFERCOUNTS) && + verifier.VerifyVector(variadicBufferCounts()) && verifier.EndTable(); + } +}; + +struct RecordBatchBuilder { + typedef RecordBatch Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_length(int64_t length) { fbb_.AddElement(RecordBatch::VT_LENGTH, length, 0); } + void add_nodes( + ::flatbuffers::Offset<::flatbuffers::Vector> + nodes) + { + fbb_.AddOffset(RecordBatch::VT_NODES, nodes); + } + void add_buffers( + ::flatbuffers::Offset<::flatbuffers::Vector> buffers) + { + fbb_.AddOffset(RecordBatch::VT_BUFFERS, buffers); + } + void add_compression( + ::flatbuffers::Offset compression) + { + fbb_.AddOffset(RecordBatch::VT_COMPRESSION, compression); + } + void add_variadicBufferCounts( + ::flatbuffers::Offset<::flatbuffers::Vector> variadicBufferCounts) + { + fbb_.AddOffset(RecordBatch::VT_VARIADICBUFFERCOUNTS, variadicBufferCounts); + } + explicit RecordBatchBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRecordBatch( + ::flatbuffers::FlatBufferBuilder& _fbb, + int64_t length = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> nodes = + 0, + ::flatbuffers::Offset<::flatbuffers::Vector> buffers = + 0, + ::flatbuffers::Offset compression = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> variadicBufferCounts = 0) +{ + RecordBatchBuilder builder_(_fbb); + builder_.add_length(length); + builder_.add_variadicBufferCounts(variadicBufferCounts); + builder_.add_compression(compression); + builder_.add_buffers(buffers); + builder_.add_nodes(nodes); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateRecordBatchDirect( + ::flatbuffers::FlatBufferBuilder& _fbb, + int64_t length = 0, + const std::vector* nodes = nullptr, + const std::vector* buffers = nullptr, + ::flatbuffers::Offset compression = 0, + const std::vector* variadicBufferCounts = nullptr) +{ + auto nodes__ = + nodes ? _fbb.CreateVectorOfStructs(*nodes) : 0; + auto buffers__ = + buffers ? _fbb.CreateVectorOfStructs(*buffers) : 0; + auto variadicBufferCounts__ = + variadicBufferCounts ? _fbb.CreateVector(*variadicBufferCounts) : 0; + return cudf::io::parquet::flatbuf::CreateRecordBatch( + _fbb, length, nodes__, buffers__, compression, variadicBufferCounts__); +} + +/// For sending dictionary encoding information. Any Field can be +/// dictionary-encoded, but in this case none of its children may be +/// dictionary-encoded. +/// There is one vector / column per dictionary, but that vector / column +/// may be spread across multiple dictionary batches by using the isDelta +/// flag +struct DictionaryBatch FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DictionaryBatchBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ID = 4, + VT_DATA = 6, + VT_ISDELTA = 8 + }; + int64_t id() const { return GetField(VT_ID, 0); } + const cudf::io::parquet::flatbuf::RecordBatch* data() const + { + return GetPointer(VT_DATA); + } + /// If isDelta is true the values in the dictionary are to be appended to a + /// dictionary with the indicated id. If isDelta is false this dictionary + /// should replace the existing dictionary. + bool isDelta() const { return GetField(VT_ISDELTA, 0) != 0; } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_ID, 8) && + VerifyOffset(verifier, VT_DATA) && verifier.VerifyTable(data()) && + VerifyField(verifier, VT_ISDELTA, 1) && verifier.EndTable(); + } +}; + +struct DictionaryBatchBuilder { + typedef DictionaryBatch Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_id(int64_t id) { fbb_.AddElement(DictionaryBatch::VT_ID, id, 0); } + void add_data(::flatbuffers::Offset data) + { + fbb_.AddOffset(DictionaryBatch::VT_DATA, data); + } + void add_isDelta(bool isDelta) + { + fbb_.AddElement(DictionaryBatch::VT_ISDELTA, static_cast(isDelta), 0); + } + explicit DictionaryBatchBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDictionaryBatch( + ::flatbuffers::FlatBufferBuilder& _fbb, + int64_t id = 0, + ::flatbuffers::Offset data = 0, + bool isDelta = false) +{ + DictionaryBatchBuilder builder_(_fbb); + builder_.add_id(id); + builder_.add_data(data); + builder_.add_isDelta(isDelta); + return builder_.Finish(); +} + +struct Message FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MessageBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VERSION = 4, + VT_HEADER_TYPE = 6, + VT_HEADER = 8, + VT_BODYLENGTH = 10, + VT_CUSTOM_METADATA = 12 + }; + cudf::io::parquet::flatbuf::MetadataVersion version() const + { + return static_cast( + GetField(VT_VERSION, 0)); + } + cudf::io::parquet::flatbuf::MessageHeader header_type() const + { + return static_cast( + GetField(VT_HEADER_TYPE, 0)); + } + const void* header() const { return GetPointer(VT_HEADER); } + template + const T* header_as() const; + const cudf::io::parquet::flatbuf::Schema* header_as_Schema() const + { + return header_type() == cudf::io::parquet::flatbuf::MessageHeader_Schema + ? static_cast(header()) + : nullptr; + } + int64_t bodyLength() const { return GetField(VT_BODYLENGTH, 0); } + const ::flatbuffers::Vector<::flatbuffers::Offset>* + custom_metadata() const + { + return GetPointer< + const ::flatbuffers::Vector<::flatbuffers::Offset>*>( + VT_CUSTOM_METADATA); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_VERSION, 2) && + VerifyField(verifier, VT_HEADER_TYPE, 1) && VerifyOffset(verifier, VT_HEADER) && + VerifyMessageHeader(verifier, header(), header_type()) && + VerifyField(verifier, VT_BODYLENGTH, 8) && + VerifyOffset(verifier, VT_CUSTOM_METADATA) && verifier.VerifyVector(custom_metadata()) && + verifier.VerifyVectorOfTables(custom_metadata()) && verifier.EndTable(); + } +}; + +template <> +inline const cudf::io::parquet::flatbuf::Schema* +Message::header_as() const +{ + return header_as_Schema(); +} + +struct MessageBuilder { + typedef Message Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_version(cudf::io::parquet::flatbuf::MetadataVersion version) + { + fbb_.AddElement(Message::VT_VERSION, static_cast(version), 0); + } + void add_header_type(cudf::io::parquet::flatbuf::MessageHeader header_type) + { + fbb_.AddElement(Message::VT_HEADER_TYPE, static_cast(header_type), 0); + } + void add_header(::flatbuffers::Offset header) + { + fbb_.AddOffset(Message::VT_HEADER, header); + } + void add_bodyLength(int64_t bodyLength) + { + fbb_.AddElement(Message::VT_BODYLENGTH, bodyLength, 0); + } + void add_custom_metadata( + ::flatbuffers::Offset< + ::flatbuffers::Vector<::flatbuffers::Offset>> + custom_metadata) + { + fbb_.AddOffset(Message::VT_CUSTOM_METADATA, custom_metadata); + } + explicit MessageBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMessage( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::MetadataVersion version = + cudf::io::parquet::flatbuf::MetadataVersion_V1, + cudf::io::parquet::flatbuf::MessageHeader header_type = + cudf::io::parquet::flatbuf::MessageHeader_NONE, + ::flatbuffers::Offset header = 0, + int64_t bodyLength = 0, + ::flatbuffers::Offset<::flatbuffers::Vector< + ::flatbuffers::Offset>> custom_metadata = 0) +{ + MessageBuilder builder_(_fbb); + builder_.add_bodyLength(bodyLength); + builder_.add_custom_metadata(custom_metadata); + builder_.add_header(header); + builder_.add_version(version); + builder_.add_header_type(header_type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateMessageDirect( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::MetadataVersion version = + cudf::io::parquet::flatbuf::MetadataVersion_V1, + cudf::io::parquet::flatbuf::MessageHeader header_type = + cudf::io::parquet::flatbuf::MessageHeader_NONE, + ::flatbuffers::Offset header = 0, + int64_t bodyLength = 0, + const std::vector<::flatbuffers::Offset>* custom_metadata = + nullptr) +{ + auto custom_metadata__ = + custom_metadata + ? _fbb.CreateVector<::flatbuffers::Offset>( + *custom_metadata) + : 0; + return cudf::io::parquet::flatbuf::CreateMessage( + _fbb, version, header_type, header, bodyLength, custom_metadata__); +} + +inline bool VerifyMessageHeader(::flatbuffers::Verifier& verifier, + const void* obj, + MessageHeader type) +{ + switch (type) { + case MessageHeader_NONE: { + return true; + } + case MessageHeader_Schema: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifyMessageHeaderVector( + ::flatbuffers::Verifier& verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset>* values, + const ::flatbuffers::Vector* types) +{ + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyMessageHeader(verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline const cudf::io::parquet::flatbuf::Message* GetMessage(const void* buf) +{ + return ::flatbuffers::GetRoot(buf); +} + +inline const cudf::io::parquet::flatbuf::Message* GetSizePrefixedMessage(const void* buf) +{ + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyMessageBuffer(::flatbuffers::Verifier& verifier) +{ + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedMessageBuffer(::flatbuffers::Verifier& verifier) +{ + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishMessageBuffer(::flatbuffers::FlatBufferBuilder& fbb, + ::flatbuffers::Offset root) +{ + fbb.Finish(root); +} + +inline void FinishSizePrefixedMessageBuffer( + ::flatbuffers::FlatBufferBuilder& fbb, + ::flatbuffers::Offset root) +{ + fbb.FinishSizePrefixed(root); +} + +} // namespace flatbuf +} // namespace parquet +} // namespace io +} // namespace cudf + +#endif // FLATBUFFERS_GENERATED_MESSAGE_CUDF_IO_PARQUET_FLATBUF_H_ diff --git a/cpp/include/cudf/io/ipc/detail/Schema_generated.h b/cpp/include/cudf/io/ipc/detail/Schema_generated.h new file mode 100644 index 00000000000..4d662704604 --- /dev/null +++ b/cpp/include/cudf/io/ipc/detail/Schema_generated.h @@ -0,0 +1,2769 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_SCHEMA_CUDF_IO_PARQUET_FLATBUF_H_ +#define FLATBUFFERS_GENERATED_SCHEMA_CUDF_IO_PARQUET_FLATBUF_H_ + +#include + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 24 && FLATBUFFERS_VERSION_MINOR == 3 && + FLATBUFFERS_VERSION_REVISION == 25, + "Non-compatible flatbuffers version included"); + +namespace cudf { +namespace io { +namespace parquet { +namespace flatbuf { + +struct Null; +struct NullBuilder; + +struct Struct_; +struct Struct_Builder; + +struct List; +struct ListBuilder; + +struct LargeList; +struct LargeListBuilder; + +struct ListView; +struct ListViewBuilder; + +struct LargeListView; +struct LargeListViewBuilder; + +struct FixedSizeList; +struct FixedSizeListBuilder; + +struct Map; +struct MapBuilder; + +struct Union; +struct UnionBuilder; + +struct Int; +struct IntBuilder; + +struct FloatingPoint; +struct FloatingPointBuilder; + +struct Utf8; +struct Utf8Builder; + +struct Binary; +struct BinaryBuilder; + +struct LargeUtf8; +struct LargeUtf8Builder; + +struct LargeBinary; +struct LargeBinaryBuilder; + +struct Utf8View; +struct Utf8ViewBuilder; + +struct BinaryView; +struct BinaryViewBuilder; + +struct FixedSizeBinary; +struct FixedSizeBinaryBuilder; + +struct Bool; +struct BoolBuilder; + +struct RunEndEncoded; +struct RunEndEncodedBuilder; + +struct Decimal; +struct DecimalBuilder; + +struct Date; +struct DateBuilder; + +struct Time; +struct TimeBuilder; + +struct Timestamp; +struct TimestampBuilder; + +struct Interval; +struct IntervalBuilder; + +struct Duration; +struct DurationBuilder; + +struct KeyValue; +struct KeyValueBuilder; + +struct DictionaryEncoding; +struct DictionaryEncodingBuilder; + +struct Field; +struct FieldBuilder; + +struct Buffer; + +struct Schema; +struct SchemaBuilder; + +enum MetadataVersion : int16_t { + /// 0.1.0 (October 2016). + MetadataVersion_V1 = 0, + /// 0.2.0 (February 2017). Non-backwards compatible with V1. + MetadataVersion_V2 = 1, + /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. + MetadataVersion_V3 = 2, + /// >= 0.8.0 (December 2017). Non-backwards compatible with V3. + MetadataVersion_V4 = 3, + /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4 + /// metadata and IPC messages). Implementations are recommended to provide a + /// V4 compatibility mode with V5 format changes disabled. + /// + /// Incompatible changes between V4 and V5: + /// - Union buffer layout has changed. In V5, Unions don't have a validity + /// bitmap buffer. + MetadataVersion_V5 = 4, + MetadataVersion_MIN = MetadataVersion_V1, + MetadataVersion_MAX = MetadataVersion_V5 +}; + +inline const MetadataVersion (&EnumValuesMetadataVersion())[5] +{ + static const MetadataVersion values[] = {MetadataVersion_V1, + MetadataVersion_V2, + MetadataVersion_V3, + MetadataVersion_V4, + MetadataVersion_V5}; + return values; +} + +inline const char* const* EnumNamesMetadataVersion() +{ + static const char* const names[6] = {"V1", "V2", "V3", "V4", "V5", nullptr}; + return names; +} + +inline const char* EnumNameMetadataVersion(MetadataVersion e) +{ + if (::flatbuffers::IsOutRange(e, MetadataVersion_V1, MetadataVersion_V5)) return ""; + const size_t index = static_cast(e); + return EnumNamesMetadataVersion()[index]; +} + +/// Represents Arrow Features that might not have full support +/// within implementations. This is intended to be used in +/// two scenarios: +/// 1. A mechanism for readers of Arrow Streams +/// and files to understand that the stream or file makes +/// use of a feature that isn't supported or unknown to +/// the implementation (and therefore can meet the Arrow +/// forward compatibility guarantees). +/// 2. A means of negotiating between a client and server +/// what features a stream is allowed to use. The enums +/// values here are intented to represent higher level +/// features, additional details maybe negotiated +/// with key-value pairs specific to the protocol. +/// +/// Enums added to this list should be assigned power-of-two values +/// to facilitate exchanging and comparing bitmaps for supported +/// features. +enum Feature : int64_t { + /// Needed to make flatbuffers happy. + Feature_UNUSED = 0, + /// The stream makes use of multiple full dictionaries with the + /// same ID and assumes clients implement dictionary replacement + /// correctly. + Feature_DICTIONARY_REPLACEMENT = 1LL, + /// The stream makes use of compressed bodies as described + /// in Message.fbs. + Feature_COMPRESSED_BODY = 2LL, + Feature_MIN = Feature_UNUSED, + Feature_MAX = Feature_COMPRESSED_BODY +}; + +inline const Feature (&EnumValuesFeature())[3] +{ + static const Feature values[] = { + Feature_UNUSED, Feature_DICTIONARY_REPLACEMENT, Feature_COMPRESSED_BODY}; + return values; +} + +inline const char* const* EnumNamesFeature() +{ + static const char* const names[4] = { + "UNUSED", "DICTIONARY_REPLACEMENT", "COMPRESSED_BODY", nullptr}; + return names; +} + +inline const char* EnumNameFeature(Feature e) +{ + if (::flatbuffers::IsOutRange(e, Feature_UNUSED, Feature_COMPRESSED_BODY)) return ""; + const size_t index = static_cast(e); + return EnumNamesFeature()[index]; +} + +enum UnionMode : int16_t { + UnionMode_Sparse = 0, + UnionMode_Dense = 1, + UnionMode_MIN = UnionMode_Sparse, + UnionMode_MAX = UnionMode_Dense +}; + +inline const UnionMode (&EnumValuesUnionMode())[2] +{ + static const UnionMode values[] = {UnionMode_Sparse, UnionMode_Dense}; + return values; +} + +inline const char* const* EnumNamesUnionMode() +{ + static const char* const names[3] = {"Sparse", "Dense", nullptr}; + return names; +} + +inline const char* EnumNameUnionMode(UnionMode e) +{ + if (::flatbuffers::IsOutRange(e, UnionMode_Sparse, UnionMode_Dense)) return ""; + const size_t index = static_cast(e); + return EnumNamesUnionMode()[index]; +} + +enum Precision : int16_t { + Precision_HALF = 0, + Precision_SINGLE = 1, + Precision_DOUBLE = 2, + Precision_MIN = Precision_HALF, + Precision_MAX = Precision_DOUBLE +}; + +inline const Precision (&EnumValuesPrecision())[3] +{ + static const Precision values[] = {Precision_HALF, Precision_SINGLE, Precision_DOUBLE}; + return values; +} + +inline const char* const* EnumNamesPrecision() +{ + static const char* const names[4] = {"HALF", "SINGLE", "DOUBLE", nullptr}; + return names; +} + +inline const char* EnumNamePrecision(Precision e) +{ + if (::flatbuffers::IsOutRange(e, Precision_HALF, Precision_DOUBLE)) return ""; + const size_t index = static_cast(e); + return EnumNamesPrecision()[index]; +} + +enum DateUnit : int16_t { + DateUnit_DAY = 0, + DateUnit_MILLISECOND = 1, + DateUnit_MIN = DateUnit_DAY, + DateUnit_MAX = DateUnit_MILLISECOND +}; + +inline const DateUnit (&EnumValuesDateUnit())[2] +{ + static const DateUnit values[] = {DateUnit_DAY, DateUnit_MILLISECOND}; + return values; +} + +inline const char* const* EnumNamesDateUnit() +{ + static const char* const names[3] = {"DAY", "MILLISECOND", nullptr}; + return names; +} + +inline const char* EnumNameDateUnit(DateUnit e) +{ + if (::flatbuffers::IsOutRange(e, DateUnit_DAY, DateUnit_MILLISECOND)) return ""; + const size_t index = static_cast(e); + return EnumNamesDateUnit()[index]; +} + +enum TimeUnit : int16_t { + TimeUnit_SECOND = 0, + TimeUnit_MILLISECOND = 1, + TimeUnit_MICROSECOND = 2, + TimeUnit_NANOSECOND = 3, + TimeUnit_MIN = TimeUnit_SECOND, + TimeUnit_MAX = TimeUnit_NANOSECOND +}; + +inline const TimeUnit (&EnumValuesTimeUnit())[4] +{ + static const TimeUnit values[] = { + TimeUnit_SECOND, TimeUnit_MILLISECOND, TimeUnit_MICROSECOND, TimeUnit_NANOSECOND}; + return values; +} + +inline const char* const* EnumNamesTimeUnit() +{ + static const char* const names[5] = { + "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND", nullptr}; + return names; +} + +inline const char* EnumNameTimeUnit(TimeUnit e) +{ + if (::flatbuffers::IsOutRange(e, TimeUnit_SECOND, TimeUnit_NANOSECOND)) return ""; + const size_t index = static_cast(e); + return EnumNamesTimeUnit()[index]; +} + +enum IntervalUnit : int16_t { + IntervalUnit_YEAR_MONTH = 0, + IntervalUnit_DAY_TIME = 1, + IntervalUnit_MONTH_DAY_NANO = 2, + IntervalUnit_MIN = IntervalUnit_YEAR_MONTH, + IntervalUnit_MAX = IntervalUnit_MONTH_DAY_NANO +}; + +inline const IntervalUnit (&EnumValuesIntervalUnit())[3] +{ + static const IntervalUnit values[] = { + IntervalUnit_YEAR_MONTH, IntervalUnit_DAY_TIME, IntervalUnit_MONTH_DAY_NANO}; + return values; +} + +inline const char* const* EnumNamesIntervalUnit() +{ + static const char* const names[4] = {"YEAR_MONTH", "DAY_TIME", "MONTH_DAY_NANO", nullptr}; + return names; +} + +inline const char* EnumNameIntervalUnit(IntervalUnit e) +{ + if (::flatbuffers::IsOutRange(e, IntervalUnit_YEAR_MONTH, IntervalUnit_MONTH_DAY_NANO)) return ""; + const size_t index = static_cast(e); + return EnumNamesIntervalUnit()[index]; +} + +/// ---------------------------------------------------------------------- +/// Top-level Type value, enabling extensible type-specific metadata. We can +/// add new logical types to Type without breaking backwards compatibility +enum Type : uint8_t { + Type_NONE = 0, + Type_Null = 1, + Type_Int = 2, + Type_FloatingPoint = 3, + Type_Binary = 4, + Type_Utf8 = 5, + Type_Bool = 6, + Type_Decimal = 7, + Type_Date = 8, + Type_Time = 9, + Type_Timestamp = 10, + Type_Interval = 11, + Type_List = 12, + Type_Struct_ = 13, + Type_Union = 14, + Type_FixedSizeBinary = 15, + Type_FixedSizeList = 16, + Type_Map = 17, + Type_Duration = 18, + Type_LargeBinary = 19, + Type_LargeUtf8 = 20, + Type_LargeList = 21, + Type_RunEndEncoded = 22, + Type_BinaryView = 23, + Type_Utf8View = 24, + Type_ListView = 25, + Type_LargeListView = 26, + Type_MIN = Type_NONE, + Type_MAX = Type_LargeListView +}; + +inline const Type (&EnumValuesType())[27] +{ + static const Type values[] = { + Type_NONE, Type_Null, Type_Int, Type_FloatingPoint, + Type_Binary, Type_Utf8, Type_Bool, Type_Decimal, + Type_Date, Type_Time, Type_Timestamp, Type_Interval, + Type_List, Type_Struct_, Type_Union, Type_FixedSizeBinary, + Type_FixedSizeList, Type_Map, Type_Duration, Type_LargeBinary, + Type_LargeUtf8, Type_LargeList, Type_RunEndEncoded, Type_BinaryView, + Type_Utf8View, Type_ListView, Type_LargeListView}; + return values; +} + +inline const char* const* EnumNamesType() +{ + static const char* const names[28] = { + "NONE", "Null", "Int", "FloatingPoint", + "Binary", "Utf8", "Bool", "Decimal", + "Date", "Time", "Timestamp", "Interval", + "List", "Struct_", "Union", "FixedSizeBinary", + "FixedSizeList", "Map", "Duration", "LargeBinary", + "LargeUtf8", "LargeList", "RunEndEncoded", "BinaryView", + "Utf8View", "ListView", "LargeListView", nullptr}; + return names; +} + +inline const char* EnumNameType(Type e) +{ + if (::flatbuffers::IsOutRange(e, Type_NONE, Type_LargeListView)) return ""; + const size_t index = static_cast(e); + return EnumNamesType()[index]; +} + +template +struct TypeTraits { + static const Type enum_value = Type_NONE; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Null; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Int; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_FloatingPoint; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Binary; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Utf8; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Bool; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Decimal; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Date; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Time; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Timestamp; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Interval; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_List; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Struct_; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Union; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_FixedSizeBinary; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_FixedSizeList; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Map; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Duration; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeBinary; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeUtf8; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeList; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_RunEndEncoded; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_BinaryView; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_Utf8View; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_ListView; +}; + +template <> +struct TypeTraits { + static const Type enum_value = Type_LargeListView; +}; + +bool VerifyType(::flatbuffers::Verifier& verifier, const void* obj, Type type); +bool VerifyTypeVector(::flatbuffers::Verifier& verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset>* values, + const ::flatbuffers::Vector* types); + +/// ---------------------------------------------------------------------- +/// Dictionary encoding metadata +/// Maintained for forwards compatibility, in the future +/// Dictionaries might be explicit maps between integers and values +/// allowing for non-contiguous index values +enum DictionaryKind : int16_t { + DictionaryKind_DenseArray = 0, + DictionaryKind_MIN = DictionaryKind_DenseArray, + DictionaryKind_MAX = DictionaryKind_DenseArray +}; + +inline const DictionaryKind (&EnumValuesDictionaryKind())[1] +{ + static const DictionaryKind values[] = {DictionaryKind_DenseArray}; + return values; +} + +inline const char* const* EnumNamesDictionaryKind() +{ + static const char* const names[2] = {"DenseArray", nullptr}; + return names; +} + +inline const char* EnumNameDictionaryKind(DictionaryKind e) +{ + if (::flatbuffers::IsOutRange(e, DictionaryKind_DenseArray, DictionaryKind_DenseArray)) return ""; + const size_t index = static_cast(e); + return EnumNamesDictionaryKind()[index]; +} + +/// ---------------------------------------------------------------------- +/// Endianness of the platform producing the data +enum Endianness : int16_t { + Endianness_Little = 0, + Endianness_Big = 1, + Endianness_MIN = Endianness_Little, + Endianness_MAX = Endianness_Big +}; + +inline const Endianness (&EnumValuesEndianness())[2] +{ + static const Endianness values[] = {Endianness_Little, Endianness_Big}; + return values; +} + +inline const char* const* EnumNamesEndianness() +{ + static const char* const names[3] = {"Little", "Big", nullptr}; + return names; +} + +inline const char* EnumNameEndianness(Endianness e) +{ + if (::flatbuffers::IsOutRange(e, Endianness_Little, Endianness_Big)) return ""; + const size_t index = static_cast(e); + return EnumNamesEndianness()[index]; +} + +/// ---------------------------------------------------------------------- +/// A Buffer represents a single contiguous memory segment +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Buffer FLATBUFFERS_FINAL_CLASS +{ + private: + int64_t offset_; + int64_t length_; + + public: + Buffer() : offset_(0), length_(0) {} + Buffer(int64_t _offset, int64_t _length) + : offset_(::flatbuffers::EndianScalar(_offset)), length_(::flatbuffers::EndianScalar(_length)) + { + } + /// The relative offset into the shared memory page where the bytes for this + /// buffer starts + int64_t offset() const { return ::flatbuffers::EndianScalar(offset_); } + /// The absolute length (in bytes) of the memory buffer. The memory is found + /// from offset (inclusive) to offset + length (non-inclusive). When building + /// messages using the encapsulated IPC message, padding bytes may be written + /// after a buffer, but such padding bytes do not need to be accounted for in + /// the size here. + int64_t length() const { return ::flatbuffers::EndianScalar(length_); } +}; +FLATBUFFERS_STRUCT_END(Buffer, 16); + +/// These are stored in the flatbuffer in the Type union below +struct Null FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef NullBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct NullBuilder { + typedef Null Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit NullBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateNull(::flatbuffers::FlatBufferBuilder& _fbb) +{ + NullBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +/// (according to the physical memory layout). We used Struct_ here as +/// Struct is a reserved word in Flatbuffers +struct Struct_ FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Struct_Builder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Struct_Builder { + typedef Struct_ Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit Struct_Builder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateStruct_(::flatbuffers::FlatBufferBuilder& _fbb) +{ + Struct_Builder builder_(_fbb); + return builder_.Finish(); +} + +struct List FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ListBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ListBuilder { + typedef List Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit ListBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateList(::flatbuffers::FlatBufferBuilder& _fbb) +{ + ListBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as List, but with 64-bit offsets, allowing to represent +/// extremely large data values. +struct LargeList FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeListBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeListBuilder { + typedef LargeList Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeListBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeList(::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeListBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Represents the same logical types that List can, but contains offsets and +/// sizes allowing for writes in any order and sharing of child values among +/// list values. +struct ListView FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ListViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct ListViewBuilder { + typedef ListView Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit ListViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateListView(::flatbuffers::FlatBufferBuilder& _fbb) +{ + ListViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent +/// extremely large data values. +struct LargeListView FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeListViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeListViewBuilder { + typedef LargeListView Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeListViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeListView( + ::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeListViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FixedSizeList FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FixedSizeListBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_LISTSIZE = 4 }; + /// Number of list items per value + int32_t listSize() const { return GetField(VT_LISTSIZE, 0); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_LISTSIZE, 4) && + verifier.EndTable(); + } +}; + +struct FixedSizeListBuilder { + typedef FixedSizeList Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_listSize(int32_t listSize) + { + fbb_.AddElement(FixedSizeList::VT_LISTSIZE, listSize, 0); + } + explicit FixedSizeListBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFixedSizeList( + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t listSize = 0) +{ + FixedSizeListBuilder builder_(_fbb); + builder_.add_listSize(listSize); + return builder_.Finish(); +} + +/// A Map is a logical nested type that is represented as +/// +/// List> +/// +/// In this layout, the keys and values are each respectively contiguous. We do +/// not constrain the key and value types, so the application is responsible +/// for ensuring that the keys are hashable and unique. Whether the keys are sorted +/// may be set in the metadata for this field. +/// +/// In a field with Map type, the field has a child Struct field, which then +/// has two children: key type and the second the value type. The names of the +/// child fields may be respectively "entries", "key", and "value", but this is +/// not enforced. +/// +/// Map +/// ```text +/// - child[0] entries: Struct +/// - child[0] key: K +/// - child[1] value: V +/// ``` +/// Neither the "entries" field nor the "key" field may be nullable. +/// +/// The metadata is structured so that Arrow systems without special handling +/// for Map can make Map an alias for List. The "layout" attribute for the Map +/// field must have the same contents as a List. +struct Map FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MapBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_KEYSSORTED = 4 }; + /// Set to true if the keys within each value are sorted + bool keysSorted() const { return GetField(VT_KEYSSORTED, 0) != 0; } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_KEYSSORTED, 1) && + verifier.EndTable(); + } +}; + +struct MapBuilder { + typedef Map Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_keysSorted(bool keysSorted) + { + fbb_.AddElement(Map::VT_KEYSSORTED, static_cast(keysSorted), 0); + } + explicit MapBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMap(::flatbuffers::FlatBufferBuilder& _fbb, + bool keysSorted = false) +{ + MapBuilder builder_(_fbb); + builder_.add_keysSorted(keysSorted); + return builder_.Finish(); +} + +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child `typeIds[offset]` is the id used in the type vector +struct Union FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnionBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_MODE = 4, VT_TYPEIDS = 6 }; + cudf::io::parquet::flatbuf::UnionMode mode() const + { + return static_cast(GetField(VT_MODE, 0)); + } + const ::flatbuffers::Vector* typeIds() const + { + return GetPointer*>(VT_TYPEIDS); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_MODE, 2) && + VerifyOffset(verifier, VT_TYPEIDS) && verifier.VerifyVector(typeIds()) && + verifier.EndTable(); + } +}; + +struct UnionBuilder { + typedef Union Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_mode(cudf::io::parquet::flatbuf::UnionMode mode) + { + fbb_.AddElement(Union::VT_MODE, static_cast(mode), 0); + } + void add_typeIds(::flatbuffers::Offset<::flatbuffers::Vector> typeIds) + { + fbb_.AddOffset(Union::VT_TYPEIDS, typeIds); + } + explicit UnionBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnion( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::UnionMode mode = cudf::io::parquet::flatbuf::UnionMode_Sparse, + ::flatbuffers::Offset<::flatbuffers::Vector> typeIds = 0) +{ + UnionBuilder builder_(_fbb); + builder_.add_typeIds(typeIds); + builder_.add_mode(mode); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateUnionDirect( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::UnionMode mode = cudf::io::parquet::flatbuf::UnionMode_Sparse, + const std::vector* typeIds = nullptr) +{ + auto typeIds__ = typeIds ? _fbb.CreateVector(*typeIds) : 0; + return cudf::io::parquet::flatbuf::CreateUnion(_fbb, mode, typeIds__); +} + +struct Int FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef IntBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BITWIDTH = 4, + VT_IS_SIGNED = 6 + }; + int32_t bitWidth() const { return GetField(VT_BITWIDTH, 0); } + bool is_signed() const { return GetField(VT_IS_SIGNED, 0) != 0; } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_BITWIDTH, 4) && + VerifyField(verifier, VT_IS_SIGNED, 1) && verifier.EndTable(); + } +}; + +struct IntBuilder { + typedef Int Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_bitWidth(int32_t bitWidth) { fbb_.AddElement(Int::VT_BITWIDTH, bitWidth, 0); } + void add_is_signed(bool is_signed) + { + fbb_.AddElement(Int::VT_IS_SIGNED, static_cast(is_signed), 0); + } + explicit IntBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateInt(::flatbuffers::FlatBufferBuilder& _fbb, + int32_t bitWidth = 0, + bool is_signed = false) +{ + IntBuilder builder_(_fbb); + builder_.add_bitWidth(bitWidth); + builder_.add_is_signed(is_signed); + return builder_.Finish(); +} + +struct FloatingPoint FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FloatingPointBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_PRECISION = 4 }; + cudf::io::parquet::flatbuf::Precision precision() const + { + return static_cast(GetField(VT_PRECISION, 0)); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_PRECISION, 2) && + verifier.EndTable(); + } +}; + +struct FloatingPointBuilder { + typedef FloatingPoint Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_precision(cudf::io::parquet::flatbuf::Precision precision) + { + fbb_.AddElement(FloatingPoint::VT_PRECISION, static_cast(precision), 0); + } + explicit FloatingPointBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFloatingPoint( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::Precision precision = cudf::io::parquet::flatbuf::Precision_HALF) +{ + FloatingPointBuilder builder_(_fbb); + builder_.add_precision(precision); + return builder_.Finish(); +} + +/// Unicode with UTF-8 encoding +struct Utf8 FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Utf8Builder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Utf8Builder { + typedef Utf8 Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit Utf8Builder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUtf8(::flatbuffers::FlatBufferBuilder& _fbb) +{ + Utf8Builder builder_(_fbb); + return builder_.Finish(); +} + +/// Opaque binary data +struct Binary FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BinaryBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BinaryBuilder { + typedef Binary Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit BinaryBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBinary(::flatbuffers::FlatBufferBuilder& _fbb) +{ + BinaryBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as Utf8, but with 64-bit offsets, allowing to represent +/// extremely large data values. +struct LargeUtf8 FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeUtf8Builder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeUtf8Builder { + typedef LargeUtf8 Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeUtf8Builder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeUtf8(::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeUtf8Builder builder_(_fbb); + return builder_.Finish(); +} + +/// Same as Binary, but with 64-bit offsets, allowing to represent +/// extremely large data values. +struct LargeBinary FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LargeBinaryBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct LargeBinaryBuilder { + typedef LargeBinary Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit LargeBinaryBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLargeBinary(::flatbuffers::FlatBufferBuilder& _fbb) +{ + LargeBinaryBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Logically the same as Utf8, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. +struct Utf8View FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Utf8ViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct Utf8ViewBuilder { + typedef Utf8View Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit Utf8ViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUtf8View(::flatbuffers::FlatBufferBuilder& _fbb) +{ + Utf8ViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Logically the same as Binary, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. +struct BinaryView FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BinaryViewBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BinaryViewBuilder { + typedef BinaryView Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit BinaryViewBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBinaryView(::flatbuffers::FlatBufferBuilder& _fbb) +{ + BinaryViewBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FixedSizeBinary FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FixedSizeBinaryBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BYTEWIDTH = 4 }; + /// Number of bytes per value + int32_t byteWidth() const { return GetField(VT_BYTEWIDTH, 0); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_BYTEWIDTH, 4) && + verifier.EndTable(); + } +}; + +struct FixedSizeBinaryBuilder { + typedef FixedSizeBinary Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_byteWidth(int32_t byteWidth) + { + fbb_.AddElement(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0); + } + explicit FixedSizeBinaryBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFixedSizeBinary( + ::flatbuffers::FlatBufferBuilder& _fbb, int32_t byteWidth = 0) +{ + FixedSizeBinaryBuilder builder_(_fbb); + builder_.add_byteWidth(byteWidth); + return builder_.Finish(); +} + +struct Bool FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BoolBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct BoolBuilder { + typedef Bool Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit BoolBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBool(::flatbuffers::FlatBufferBuilder& _fbb) +{ + BoolBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Contains two child arrays, run_ends and values. +/// The run_ends child array must be a 16/32/64-bit integer array +/// which encodes the indices at which the run with the value in +/// each corresponding index in the values child array ends. +/// Like list/struct types, the value array can be of any type. +struct RunEndEncoded FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RunEndEncodedBuilder Builder; + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && verifier.EndTable(); + } +}; + +struct RunEndEncodedBuilder { + typedef RunEndEncoded Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + explicit RunEndEncodedBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRunEndEncoded( + ::flatbuffers::FlatBufferBuilder& _fbb) +{ + RunEndEncodedBuilder builder_(_fbb); + return builder_.Finish(); +} + +/// Exact decimal value represented as an integer value in two's +/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +/// are used. The representation uses the endianness indicated +/// in the Schema. +struct Decimal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DecimalBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PRECISION = 4, + VT_SCALE = 6, + VT_BITWIDTH = 8 + }; + /// Total number of decimal digits + int32_t precision() const { return GetField(VT_PRECISION, 0); } + /// Number of digits after the decimal point "." + int32_t scale() const { return GetField(VT_SCALE, 0); } + /// Number of bits per value. The only accepted widths are 128 and 256. + /// We use bitWidth for consistency with Int::bitWidth. + int32_t bitWidth() const { return GetField(VT_BITWIDTH, 128); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_PRECISION, 4) && + VerifyField(verifier, VT_SCALE, 4) && + VerifyField(verifier, VT_BITWIDTH, 4) && verifier.EndTable(); + } +}; + +struct DecimalBuilder { + typedef Decimal Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_precision(int32_t precision) + { + fbb_.AddElement(Decimal::VT_PRECISION, precision, 0); + } + void add_scale(int32_t scale) { fbb_.AddElement(Decimal::VT_SCALE, scale, 0); } + void add_bitWidth(int32_t bitWidth) + { + fbb_.AddElement(Decimal::VT_BITWIDTH, bitWidth, 128); + } + explicit DecimalBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDecimal(::flatbuffers::FlatBufferBuilder& _fbb, + int32_t precision = 0, + int32_t scale = 0, + int32_t bitWidth = 128) +{ + DecimalBuilder builder_(_fbb); + builder_.add_bitWidth(bitWidth); + builder_.add_scale(scale); + builder_.add_precision(precision); + return builder_.Finish(); +} + +/// Date is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +/// +/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +/// leap seconds), where the values are evenly divisible by 86400000 +/// * Days (32 bits) since the UNIX epoch +struct Date FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DateBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_UNIT = 4 }; + cudf::io::parquet::flatbuf::DateUnit unit() const + { + return static_cast(GetField(VT_UNIT, 1)); + } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_UNIT, 2) && + verifier.EndTable(); + } +}; + +struct DateBuilder { + typedef Date Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_unit(cudf::io::parquet::flatbuf::DateUnit unit) + { + fbb_.AddElement(Date::VT_UNIT, static_cast(unit), 1); + } + explicit DateBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDate( + ::flatbuffers::FlatBufferBuilder& _fbb, + cudf::io::parquet::flatbuf::DateUnit unit = cudf::io::parquet::flatbuf::DateUnit_MILLISECOND) +{ + DateBuilder builder_(_fbb); + builder_.add_unit(unit); + return builder_.Finish(); +} + +/// Time is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since midnight, stored in either of four units: seconds, +/// milliseconds, microseconds or nanoseconds. +/// +/// The integer `bitWidth` depends on the `unit` and must be one of the following: +/// * SECOND and MILLISECOND: 32 bits +/// * MICROSECOND and NANOSECOND: 64 bits +/// +/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +/// (exclusive), adjusted for the time unit (for example, up to 86400000 +/// exclusive for the MILLISECOND unit). +/// This definition doesn't allow for leap seconds. Time values from +/// measurements with leap seconds will need to be corrected when ingesting +/// into Arrow (for example by replacing the value 86400 with 86399). +struct Time FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TimeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_UNIT = 4, VT_BITWIDTH = 6 }; + cudf::io::parquet::flatbuf::TimeUnit unit() const + { + return static_cast(GetField(VT_UNIT, 1)); + } + int32_t bitWidth() const { return GetField(VT_BITWIDTH, 32); } + bool Verify(::flatbuffers::Verifier& verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_UNIT, 2) && + VerifyField(verifier, VT_BITWIDTH, 4) && verifier.EndTable(); + } +}; + +struct TimeBuilder { + typedef Time Table; + ::flatbuffers::FlatBufferBuilder& fbb_; + ::flatbuffers::uoffset_t start_; + void add_unit(cudf::io::parquet::flatbuf::TimeUnit unit) + { + fbb_.AddElement(Time::VT_UNIT, static_cast(unit), 1); + } + void add_bitWidth(int32_t bitWidth) { fbb_.AddElement(Time::VT_BITWIDTH, bitWidth, 32); } + explicit TimeBuilder(::flatbuffers::FlatBufferBuilder& _fbb) : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset