diff --git a/CMakeLists.txt b/CMakeLists.txt index 48f2cd96..e375e3fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,7 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/details/3rdparty/float16_t.hpp # layout ${SPARROW_INCLUDE_DIR}/sparrow/layout/array_base.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/layout/array_wrapper.hpp ${SPARROW_INCLUDE_DIR}/sparrow/layout/dictionary_encoded_array.hpp ${SPARROW_INCLUDE_DIR}/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp ${SPARROW_INCLUDE_DIR}/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_iterator.hpp diff --git a/include/sparrow/array_factory.hpp b/include/sparrow/array_factory.hpp index 51351387..4672d9dd 100644 --- a/include/sparrow/array_factory.hpp +++ b/include/sparrow/array_factory.hpp @@ -18,11 +18,11 @@ #include "sparrow/arrow_array_schema_proxy.hpp" #include "sparrow/config/config.hpp" -#include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_wrapper.hpp" #include "sparrow/utils/memory.hpp" namespace sparrow { - SPARROW_API cloning_ptr array_factory(arrow_proxy proxy); + SPARROW_API cloning_ptr array_factory(arrow_proxy proxy); } diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index ce316883..513cb96f 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -36,34 +36,6 @@ namespace sparrow return {arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size}; } - /** - * Base class for array type erasure - */ - class array_base - { - public: - - virtual ~array_base() = default; - - array_base(array_base&&) = delete; - array_base& operator=(const array_base&) = delete; - array_base& operator=(array_base&&) = delete; - - array_base* clone() const; - - enum data_type data_type() const; - - protected: - - array_base(enum data_type dt); - array_base(const array_base&) = default; - - private: - - enum data_type m_data_type; - virtual array_base* clone_impl() const = 0; - }; - /** * Base class for array_inner_types specialization * @@ -167,24 +139,8 @@ namespace sparrow friend class layout_iterator; }; - /***************************** - * array_base implementation * - *****************************/ - - inline array_base* array_base::clone() const - { - return clone_impl(); - } - - inline enum data_type array_base::data_type() const - { - return m_data_type; - } - - inline array_base::array_base(enum data_type dt) - : m_data_type(dt) - { - } + template + bool operator==(const array_crtp_base& lhs, const array_crtp_base& rhs); /********************************** * array_crtp_base implementation * @@ -335,4 +291,10 @@ namespace sparrow { return *static_cast(this); } + + template + bool operator==(const array_crtp_base& lhs, const array_crtp_base& rhs) + { + return std::ranges::equal(lhs, rhs); + } } diff --git a/include/sparrow/layout/array_wrapper.hpp b/include/sparrow/layout/array_wrapper.hpp new file mode 100644 index 00000000..ad59630d --- /dev/null +++ b/include/sparrow/layout/array_wrapper.hpp @@ -0,0 +1,187 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "sparrow/types/data_traits.hpp" +#include "sparrow/utils/memory.hpp" + +namespace sparrow +{ + /** + * Base class for array type erasure + */ + class array_wrapper + { + public: + + using wrapper_ptr = std::unique_ptr; + + virtual ~array_wrapper() = default; + + array_wrapper(array_wrapper&&) = delete; + array_wrapper& operator=(const array_wrapper&) = delete; + array_wrapper& operator=(array_wrapper&&) = delete; + + wrapper_ptr clone() const; + + enum data_type data_type() const; + + protected: + + array_wrapper(enum data_type dt); + array_wrapper(const array_wrapper&) = default; + + private: + + enum data_type m_data_type; + virtual wrapper_ptr clone_impl() const = 0; + }; + + template + class array_wrapper_impl : public array_wrapper + { + public: + + array_wrapper_impl(T&& ar); + array_wrapper_impl(T* ar); + array_wrapper_impl(std::shared_ptr ar); + + virtual ~array_wrapper_impl() = default; + + T& get_wrapped(); + const T& get_wrapped() const; + + private: + + using wrapper_ptr = array_wrapper::wrapper_ptr; + + constexpr enum data_type get_data_type() const; + + array_wrapper_impl(const array_wrapper_impl&); + wrapper_ptr clone_impl() const override; + + using storage_type = std::variant, std::shared_ptr, T*>; + storage_type m_storage; + T* p_array; + }; + + template + T& unwrap_array(array_wrapper&); + + template + const T& unwrap_array(const array_wrapper&); + + /******************************** + * array_wrapper implementation * + ********************************/ + + inline auto array_wrapper::clone() const -> wrapper_ptr + { + return clone_impl(); + } + + inline enum data_type array_wrapper::data_type() const + { + return m_data_type; + } + + inline array_wrapper::array_wrapper(enum data_type dt) + : m_data_type(dt) + { + } + + /************************************* + * array_wrapper_impl implementation * + *************************************/ + + template + array_wrapper_impl::array_wrapper_impl(T&& ar) + : array_wrapper(this->get_data_type()) + , m_storage(value_ptr(std::move(ar))) + , p_array(std::get>(m_storage).get()) + { + } + + template + array_wrapper_impl::array_wrapper_impl(T* ar) + : array_wrapper(this->get_data_type()) + , m_storage(ar) + , p_array(ar) + { + } + + template + array_wrapper_impl::array_wrapper_impl(std::shared_ptr ar) + : array_wrapper(this->get_data_type()) + , m_storage(ar) + , p_array(ar.get()) + { + } + + template + T& array_wrapper_impl::get_wrapped() + { + return *p_array; + } + + template + const T& array_wrapper_impl::get_wrapped() const + { + return *p_array; + } + + template + constexpr enum data_type array_wrapper_impl::get_data_type() const + { + return arrow_traits::type_id; + } + + template + array_wrapper_impl::array_wrapper_impl(const array_wrapper_impl& rhs) + : array_wrapper(rhs) + , m_storage(rhs.m_storage) + { + p_array = std::visit([](auto&& arg) + { + using U = std::decay_t; + if constexpr (std::is_same_v) + return arg; + else + return arg.get(); + }, m_storage); + } + + template + auto array_wrapper_impl::clone_impl() const -> wrapper_ptr + { + return wrapper_ptr{new array_wrapper_impl(*this)}; + } + + template + T& unwrap_array(array_wrapper& ar) + { + return static_cast&>(ar).get_wrapped(); + } + + template + const T& unwrap_array(const array_wrapper& ar) + { + return static_cast&>(ar).get_wrapped(); + } +} + diff --git a/include/sparrow/layout/dictionary_encoded_array.hpp b/include/sparrow/layout/dictionary_encoded_array.hpp index c3722db7..cb8f395d 100644 --- a/include/sparrow/layout/dictionary_encoded_array.hpp +++ b/include/sparrow/layout/dictionary_encoded_array.hpp @@ -16,6 +16,7 @@ #include "sparrow/arrow_array_schema_proxy.hpp" #include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp" #include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_iterator.hpp" #include "sparrow/layout/primitive_array.hpp" @@ -80,8 +81,7 @@ namespace sparrow }; template - class dictionary_encoded_array final : public array_base, - public array_crtp_base> + class dictionary_encoded_array final : public array_crtp_base> { public: @@ -109,7 +109,6 @@ namespace sparrow using const_bitmap_range = typename base_type::const_bitmap_range; explicit dictionary_encoded_array(arrow_proxy); - ~dictionary_encoded_array() override = default; using base_type::size; @@ -132,9 +131,6 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; - dictionary_encoded_array(const dictionary_encoded_array&) = default; - dictionary_encoded_array* clone_impl() const override; - keys_layout m_keys_layout; values_layout m_values_layout; @@ -156,8 +152,7 @@ namespace sparrow template dictionary_encoded_array::dictionary_encoded_array(arrow_proxy proxy) - : array_base(proxy.data_type()) - , base_type(std::move(proxy)) + : base_type(std::move(proxy)) , m_keys_layout(create_keys_layout(storage())) , m_values_layout(create_values_layout(storage())) { @@ -227,13 +222,6 @@ namespace sparrow return sparrow::next(value_cbegin(), size()); } - template - dictionary_encoded_array* dictionary_encoded_array::clone_impl() const - { - arrow_proxy copy = storage(); - return new dictionary_encoded_array(std::move(copy)); - } - template typename dictionary_encoded_array::values_layout dictionary_encoded_array::create_values_layout(arrow_proxy& proxy) diff --git a/include/sparrow/layout/dispatch.hpp b/include/sparrow/layout/dispatch.hpp index 61c7c9fd..0467194f 100644 --- a/include/sparrow/layout/dispatch.hpp +++ b/include/sparrow/layout/dispatch.hpp @@ -16,7 +16,7 @@ #include -#include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/null_array.hpp" #include "sparrow/layout/primitive_array.hpp" #include "sparrow/layout/nested_value_types.hpp" @@ -28,57 +28,57 @@ namespace sparrow using visit_result_t = std::invoke_result_t; template - visit_result_t visit(F&& func, const array_base& ar); + visit_result_t visit(F&& func, const array_wrapper& ar); - std::size_t array_size(const array_base& ar); - array_traits::const_reference array_element(const array_base& ar, std::size_t index); + std::size_t array_size(const array_wrapper& ar); + array_traits::const_reference array_element(const array_wrapper& ar, std::size_t index); /****************** * Implementation * ******************/ template - visit_result_t visit(F&& func, const array_base& ar) + visit_result_t visit(F&& func, const array_wrapper& ar) { switch(ar.data_type()) { case data_type::NA: - return func(static_cast(ar));; + return func(unwrap_array(ar));; case data_type::BOOL: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::UINT8: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::INT8: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::UINT16: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::INT16: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::UINT32: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::INT32: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::UINT64: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::INT64: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::HALF_FLOAT: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::FLOAT: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); case data_type::DOUBLE: - return func(static_cast&>(ar)); + return func(unwrap_array>(ar)); default: throw std::invalid_argument("array type not supported"); } } - inline std::size_t array_size(const array_base& ar) + inline std::size_t array_size(const array_wrapper& ar) { return visit([](const auto& impl) { return impl.size(); }, ar); } - inline array_traits::const_reference array_element(const array_base& ar, std::size_t index) + inline array_traits::const_reference array_element(const array_wrapper& ar, std::size_t index) { using return_type = array_traits::const_reference; return visit([index](const auto& impl) -> return_type { return return_type(impl[index]); }, ar); diff --git a/include/sparrow/layout/list_layout/list_array.hpp b/include/sparrow/layout/list_layout/list_array.hpp index 79c45322..0c2e02a5 100644 --- a/include/sparrow/layout/list_layout/list_array.hpp +++ b/include/sparrow/layout/list_layout/list_array.hpp @@ -18,6 +18,7 @@ #include "sparrow/array_factory.hpp" #include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/layout_utils.hpp" #include "sparrow/layout/nested_value_types.hpp" #include "sparrow/types/data_traits.hpp" @@ -99,8 +100,7 @@ namespace sparrow // - big-list-view-array // - fixed-size-list-array template - class list_array_crtp_base : public array_base, - public array_crtp_base + class list_array_crtp_base : public array_crtp_base { public: @@ -122,19 +122,15 @@ namespace sparrow using inner_reference = list_value; using inner_const_reference = list_value; - using value_type = nullable; using reference = nullable; using const_reference = nullable; - using iterator_tag = std::contiguous_iterator_tag; - + using iterator_tag = typename base_type::iterator_tag; explicit list_array_crtp_base(arrow_proxy proxy); - virtual ~list_array_crtp_base() = default; - list_array_crtp_base(const list_array_crtp_base& rhs) = default; - list_array_crtp_base* clone_impl() const override; - const array_base* raw_flat_array() const; - array_base* raw_flat_array(); + + const array_wrapper* raw_flat_array() const; + array_wrapper* raw_flat_array(); private: @@ -152,7 +148,7 @@ namespace sparrow bitmap_type::const_iterator bitmap_begin_impl() const; // data members - cloning_ptr p_flat_array; + cloning_ptr p_flat_array; bitmap_type m_bitmap; // friend classes @@ -182,10 +178,11 @@ namespace sparrow static constexpr std::size_t OFFSET_BUFFER_INDEX = 1; std::pair offset_range(size_type i) const; + offset_type* p_list_offsets; + // friend classes friend class array_crtp_base; friend class list_array_crtp_base; - offset_type* p_list_offsets; }; template @@ -208,11 +205,12 @@ namespace sparrow static constexpr std::size_t SIZES_BUFFER_INDEX = 2; std::pair offset_range(size_type i) const; + offset_type* p_list_offsets; + offset_type* p_list_sizes; + // friend classes friend class array_crtp_base; friend class list_array_crtp_base; - offset_type* p_list_offsets; - offset_type* p_list_sizes; }; class fixed_sized_list_array final : public list_array_crtp_base @@ -242,27 +240,20 @@ namespace sparrow template list_array_crtp_base::list_array_crtp_base(arrow_proxy proxy) - : array_base(proxy.data_type()) - , base_type(std::move(proxy)) + : base_type(std::move(proxy)) , p_flat_array(std::move(array_factory(this->storage().children()[0].view()))) , m_bitmap(make_simple_bitmap(this->storage())) { } template - auto list_array_crtp_base::clone_impl() const -> list_array_crtp_base* - { - return new list_array_crtp_base(*this); - } - - template - auto list_array_crtp_base::raw_flat_array() const -> const array_base* + auto list_array_crtp_base::raw_flat_array() const -> const array_wrapper* { return p_flat_array.get(); } template - auto list_array_crtp_base::raw_flat_array() -> array_base* + auto list_array_crtp_base::raw_flat_array() -> array_wrapper* { return p_flat_array.get(); } diff --git a/include/sparrow/layout/list_layout/list_value.hpp b/include/sparrow/layout/list_layout/list_value.hpp index 8ac4da5f..6a8f1021 100644 --- a/include/sparrow/layout/list_layout/list_value.hpp +++ b/include/sparrow/layout/list_layout/list_value.hpp @@ -15,7 +15,7 @@ #pragma once #include "sparrow/config/config.hpp" -#include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_wrapper.hpp" #include "sparrow/types/data_traits.hpp" namespace sparrow @@ -28,14 +28,14 @@ namespace sparrow using const_reference = array_traits::const_reference; using size_type = std::size_t; - list_value(const array_base* flat_array, size_type index_begin, size_type index_end); + list_value(const array_wrapper* flat_array, size_type index_begin, size_type index_end); size_type size() const; const_reference operator[](size_type i) const; private: - const array_base* p_flat_array; + const array_wrapper* p_flat_array; size_type m_index_begin; size_type m_index_end; }; diff --git a/include/sparrow/layout/null_array.hpp b/include/sparrow/layout/null_array.hpp index 282b3725..f00c2d5b 100644 --- a/include/sparrow/layout/null_array.hpp +++ b/include/sparrow/layout/null_array.hpp @@ -58,7 +58,7 @@ namespace sparrow friend class iterator_access; }; - class null_array final : public array_base + class null_array { public: @@ -70,7 +70,7 @@ namespace sparrow using const_reference = const_iterator::reference; using size_type = std::size_t; using difference_type = iterator::difference_type; - using iterator_tag = std::contiguous_iterator_tag; + using iterator_tag = std::random_access_iterator_tag; using const_value_iterator = empty_iterator; using const_bitmap_iterator = empty_iterator; @@ -79,7 +79,6 @@ namespace sparrow using const_bitmap_range = std::ranges::subrange; explicit null_array(arrow_proxy); - virtual ~null_array() = default; size_type size() const; @@ -102,12 +101,11 @@ namespace sparrow difference_type ssize() const; - null_array(const null_array&) = default; - null_array* clone_impl() const override; - arrow_proxy m_proxy; }; + bool operator==(const null_array& lhs, const null_array& rhs); + /********************************* * empty_iterator implementation * *********************************/ @@ -165,8 +163,7 @@ namespace sparrow *****************************/ inline null_array::null_array(arrow_proxy proxy) - : array_base(proxy.data_type()) - , m_proxy(std::move(proxy)) + : m_proxy(std::move(proxy)) { SPARROW_ASSERT_TRUE(m_proxy.data_type() == data_type::NA); } @@ -233,9 +230,9 @@ namespace sparrow return static_cast(size()); } - inline null_array* null_array::clone_impl() const + inline bool operator==(const null_array& lhs, const null_array& rhs) { - return new null_array(*this); + return lhs.size() == rhs.size(); } } diff --git a/include/sparrow/layout/primitive_array.hpp b/include/sparrow/layout/primitive_array.hpp index 79b2d69b..c3fe5bdb 100644 --- a/include/sparrow/layout/primitive_array.hpp +++ b/include/sparrow/layout/primitive_array.hpp @@ -39,12 +39,11 @@ namespace sparrow using value_iterator = pointer_iterator; using const_value_iterator = pointer_iterator; - using iterator_tag = std::contiguous_iterator_tag; + using iterator_tag = std::random_access_iterator_tag; }; template - class primitive_array final : public array_base, - public array_crtp_base> + class primitive_array final : public array_crtp_base> { public: @@ -64,7 +63,7 @@ namespace sparrow using const_pointer = typename inner_types::const_pointer; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; - using iterator_tag = std::contiguous_iterator_tag; + using iterator_tag = typename base_type::iterator_tag; using value_iterator = typename base_type::value_iterator; using bitmap_range = typename base_type::bitmap_range; @@ -72,11 +71,11 @@ namespace sparrow using const_bitmap_range = typename base_type::const_bitmap_range; explicit primitive_array(arrow_proxy); - ~primitive_array() override = default; using base_type::size; private: + bitmap_type::iterator bitmap_begin_impl(); bitmap_type::const_iterator bitmap_begin_impl() const; @@ -97,9 +96,6 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; - primitive_array(const primitive_array&) = default; - primitive_array* clone_impl() const override; - static constexpr size_type DATA_BUFFER_INDEX = 1; bitmap_type m_bitmap; @@ -136,8 +132,7 @@ namespace sparrow template primitive_array::primitive_array(arrow_proxy proxy) - : array_base(proxy.data_type()) - , base_type(std::move(proxy)) + : base_type(std::move(proxy)) , m_bitmap(make_simple_bitmap(storage())) { SPARROW_ASSERT_TRUE(detail::check_primitive_data_type(storage().data_type())); @@ -195,12 +190,6 @@ namespace sparrow return sparrow::next(value_cbegin(), size()); } - template - primitive_array* primitive_array::clone_impl() const - { - return new primitive_array(*this); - } - template auto primitive_array::bitmap_begin_impl() -> bitmap_type::iterator { diff --git a/include/sparrow/layout/struct_layout/struct_array.hpp b/include/sparrow/layout/struct_layout/struct_array.hpp index c8e7e3f9..afe23890 100644 --- a/include/sparrow/layout/struct_layout/struct_array.hpp +++ b/include/sparrow/layout/struct_layout/struct_array.hpp @@ -16,6 +16,7 @@ #include "sparrow/array_factory.hpp" #include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/layout_utils.hpp" #include "sparrow/layout/nested_value_types.hpp" #include "sparrow/utils/functor_index_iterator.hpp" @@ -40,8 +41,7 @@ namespace sparrow using iterator_tag = std::random_access_iterator_tag; }; - class struct_array final : public array_base, - public array_crtp_base + class struct_array final : public array_crtp_base { public: @@ -66,15 +66,12 @@ namespace sparrow using value_type = nullable; using reference = nullable; using const_reference = nullable; - using iterator_tag = std::contiguous_iterator_tag; - + using iterator_tag = base_type::iterator_tag; explicit struct_array(arrow_proxy proxy); - virtual ~struct_array() = default; - struct_array(const struct_array& rhs) = default; - struct_array* clone_impl() const override; - const array_base* raw_child(std::size_t i) const; - array_base* raw_child(std::size_t i); + + const array_wrapper* raw_child(std::size_t i) const; + array_wrapper* raw_child(std::size_t i); private: @@ -89,7 +86,7 @@ namespace sparrow bitmap_type::const_iterator bitmap_begin_impl() const; // data members - std::vector> m_children; + std::vector> m_children; bitmap_type m_bitmap; // friend classes @@ -101,8 +98,7 @@ namespace sparrow }; inline struct_array::struct_array(arrow_proxy proxy) - : array_base(proxy.data_type()) - , base_type(std::move(proxy)) + : base_type(std::move(proxy)) , m_children(this->storage().children().size(), nullptr) , m_bitmap(make_simple_bitmap(storage())) { @@ -112,17 +108,12 @@ namespace sparrow } } - inline auto struct_array::clone_impl() const -> struct_array* - { - return new struct_array(*this); - } - - inline auto struct_array::raw_child(std::size_t i) const -> const array_base* + inline auto struct_array::raw_child(std::size_t i) const -> const array_wrapper* { return m_children[i].get(); } - inline auto struct_array::raw_child(std::size_t i) -> array_base* + inline auto struct_array::raw_child(std::size_t i) -> array_wrapper* { return m_children[i].get(); } diff --git a/include/sparrow/layout/struct_layout/struct_value.hpp b/include/sparrow/layout/struct_layout/struct_value.hpp index 17a79df5..8aae2e7b 100644 --- a/include/sparrow/layout/struct_layout/struct_value.hpp +++ b/include/sparrow/layout/struct_layout/struct_value.hpp @@ -15,7 +15,7 @@ #pragma once #include "sparrow/config/config.hpp" -#include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_wrapper.hpp" #include "sparrow/types/data_traits.hpp" #include "sparrow/utils/memory.hpp" @@ -28,14 +28,15 @@ namespace sparrow using value_type = array_traits::value_type; using const_reference = array_traits::const_reference; using size_type = std::size_t; + using child_ptr = cloning_ptr; - struct_value(const std::vector>& children, size_type index); + struct_value(const std::vector& children, size_type index); size_type size() const; const_reference operator[](size_type i) const; private: - const std::vector>& m_children; + const std::vector& m_children; size_type m_index; }; } diff --git a/include/sparrow/layout/variable_size_binary_array.hpp b/include/sparrow/layout/variable_size_binary_array.hpp index 51c9cbb2..9c627aa3 100644 --- a/include/sparrow/layout/variable_size_binary_array.hpp +++ b/include/sparrow/layout/variable_size_binary_array.hpp @@ -179,8 +179,7 @@ namespace sparrow }; template - class variable_size_binary_array final : public array_base, - public array_crtp_base> + class variable_size_binary_array final : public array_crtp_base> { public: @@ -201,7 +200,7 @@ namespace sparrow using const_offset_iterator = typename inner_types::const_offset_iterator; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; - using iterator_tag = std::contiguous_iterator_tag; + using iterator_tag = typename base_type::iterator_tag; using data_iterator = typename inner_types::data_iterator; using const_data_iterator = typename inner_types::const_data_iterator; using data_value_type = typename inner_types::data_value_type; @@ -213,7 +212,6 @@ namespace sparrow using const_value_iterator = typename inner_types::const_value_iterator; explicit variable_size_binary_array(arrow_proxy); - ~variable_size_binary_array() override = default; using base_type::size; @@ -255,9 +253,6 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; - variable_size_binary_array(const variable_size_binary_array&) = default; - variable_size_binary_array* clone_impl() const override; - friend class array_crtp_base; friend class variable_size_binary_reference; friend class variable_size_binary_value_iterator; @@ -447,8 +442,7 @@ namespace sparrow template variable_size_binary_array::variable_size_binary_array(arrow_proxy proxy) - : array_base(proxy.data_type()) - , base_type(std::move(proxy)) + : base_type(std::move(proxy)) , m_bitmap(make_simple_bitmap(storage())) { const auto type = storage().data_type(); @@ -594,12 +588,6 @@ namespace sparrow return sparrow::next(value_cbegin(), size()); } - template - variable_size_binary_array* variable_size_binary_array::clone_impl() const - { - return new variable_size_binary_array(*this); - } - template auto variable_size_binary_array::bitmap_begin_impl() -> bitmap_type::iterator { diff --git a/include/sparrow/utils/memory.hpp b/include/sparrow/utils/memory.hpp index 181547af..8d2fd310 100644 --- a/include/sparrow/utils/memory.hpp +++ b/include/sparrow/utils/memory.hpp @@ -35,103 +35,51 @@ namespace sparrow template class value_ptr { + using internal_pointer = std::unique_ptr; public: - constexpr value_ptr() noexcept = default; - - constexpr value_ptr(std::nullptr_t) noexcept - { - } - - explicit value_ptr(T value) - : value_(std::make_unique(std::move(value))) - { - } - - explicit value_ptr(T* value) - : value_(value != nullptr ? std::make_unique(*value) : std::unique_ptr()) - { - } + using self_type = value_ptr; + using pointer = typename internal_pointer::pointer; + using element_type = typename internal_pointer::element_type; - value_ptr(const value_ptr& other) - : value_(other.value_ ? std::make_unique(*other.value_) : std::unique_ptr()) - { - } + // Value semantic - value_ptr(value_ptr&& other) noexcept = default; + constexpr value_ptr() noexcept = default; + constexpr value_ptr(std::nullptr_t) noexcept; + explicit value_ptr(T value); + explicit value_ptr(T* value); ~value_ptr() = default; - value_ptr& operator=(const value_ptr& other) - { - if (other.has_value()) - { - if (value_) - { - *value_ = *other.value_; - } - else - { - value_ = std::make_unique(*other.value_); - } - } - else - { - value_.reset(); - } - return *this; - } + value_ptr(const value_ptr& other); + value_ptr(value_ptr&& other) noexcept = default; + value_ptr& operator=(const value_ptr& other); value_ptr& operator=(value_ptr&& other) noexcept = default; - value_ptr& operator=(std::nullptr_t) noexcept - { - reset(); - return *this; - } + value_ptr& operator=(std::nullptr_t) noexcept; - T& operator*() - { - SPARROW_ASSERT_TRUE(value_); - return *value_; - } + // Modifiers - const T& operator*() const - { - SPARROW_ASSERT_TRUE(value_); - return *value_; - } + void reset() noexcept; - T* operator->() - { - SPARROW_ASSERT_TRUE(value_); - return &*value_; - } + // Observers - const T* operator->() const - { - SPARROW_ASSERT_TRUE(value_); - return &*value_; - } + T* get() noexcept; + const T* get() const noexcept; - explicit operator bool() const noexcept - { - return has_value(); - } + explicit operator bool() const noexcept; + bool has_value() const noexcept; - bool has_value() const noexcept - { - return bool(value_); - } + T& operator*(); + const T& operator*() const; - void reset() noexcept - { - value_.reset(); - } + T* operator->(); + const T* operator->() const; private: - std::unique_ptr value_; + internal_pointer value_; }; /** @@ -257,6 +205,119 @@ namespace sparrow template cloning_ptr make_cloning_ptr(Args&&... args); + /**************************** + * value_ptr implementation * + ****************************/ + + template + constexpr value_ptr::value_ptr(std::nullptr_t) noexcept + { + } + + template + value_ptr::value_ptr(T value) + : value_(std::make_unique(std::move(value))) + { + } + + template + value_ptr::value_ptr(T* value) + : value_(value != nullptr ? std::make_unique(*value) : std::unique_ptr()) + { + } + + template + value_ptr::value_ptr(const value_ptr& other) + : value_(other.value_ ? std::make_unique(*other.value_) : std::unique_ptr()) + { + } + + template + value_ptr& value_ptr::operator=(const value_ptr& other) + { + if (other.has_value()) + { + if (value_) + { + *value_ = *other.value_; + } + else + { + value_ = std::make_unique(*other.value_); + } + } + else + { + value_.reset(); + } + return *this; + } + + template + value_ptr& value_ptr::operator=(std::nullptr_t) noexcept + { + reset(); + return *this; + } + + template + void value_ptr::reset() noexcept + { + value_.reset(); + } + + template + T* value_ptr::get() noexcept + { + return value_.get(); + } + + template + const T* value_ptr::get() const noexcept + { + return value_.get(); + } + + template + value_ptr::operator bool() const noexcept + { + return has_value(); + } + + template + bool value_ptr::has_value() const noexcept + { + return bool(value_); + } + + template + T& value_ptr::operator*() + { + SPARROW_ASSERT_TRUE(value_); + return *value_; + } + + template + const T& value_ptr::operator*() const + { + SPARROW_ASSERT_TRUE(value_); + return *value_; + } + + template + T* value_ptr::operator->() + { + SPARROW_ASSERT_TRUE(value_); + return &*value_; + } + + template + const T* value_ptr::operator->() const + { + SPARROW_ASSERT_TRUE(value_); + return &*value_; + } + /****************************** * cloning_ptr implementation * ******************************/ diff --git a/include/sparrow/utils/nullable.hpp b/include/sparrow/utils/nullable.hpp index 4995cf0f..da58db28 100644 --- a/include/sparrow/utils/nullable.hpp +++ b/include/sparrow/utils/nullable.hpp @@ -514,7 +514,27 @@ namespace sparrow constexpr explicit operator bool() const; constexpr bool has_value() const; }; +} +namespace std +{ + namespace mpl = sparrow::mpl; + + // Specialization of basic_common_reference for nullable proxies so + // we can use ranges algorithm on iterators returning nullable + template class TQual, template class UQual> + struct basic_common_reference, sparrow::nullable, TQual, UQual> + { + using type = sparrow::nullable< + std::common_reference_t, UQual>, + std::common_reference_t, UQual> + >; + }; +} + +namespace sparrow +{ /*************************** * nullable implementation * ***************************/ diff --git a/src/array_factory.cpp b/src/array_factory.cpp index da1df7d0..45e8d519 100644 --- a/src/array_factory.cpp +++ b/src/array_factory.cpp @@ -14,8 +14,7 @@ #include -#include "sparrow/arrow_array_schema_proxy.hpp" -#include "sparrow/layout/array_base.hpp" +#include "sparrow/array_factory.hpp" #include "sparrow/layout/primitive_array.hpp" #include "sparrow/layout/list_layout/list_array.hpp" #include "sparrow/layout/struct_layout/struct_array.hpp" @@ -24,49 +23,58 @@ namespace sparrow { - cloning_ptr array_factory(arrow_proxy proxy) + namespace detail + { + template + cloning_ptr make_wrapper_ptr(arrow_proxy proxy) + { + return cloning_ptr{new array_wrapper_impl(T(std::move(proxy))) }; + } + } + + cloning_ptr array_factory(arrow_proxy proxy) { const auto dt = proxy.data_type(); switch(dt) { case data_type::NA: - return make_cloning_ptr(std::move(proxy)); + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::BOOL: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::INT8: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::UINT8: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::INT16: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::UINT16: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::INT32: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::UINT32: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::INT64: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::UINT64: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::FLOAT: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::DOUBLE: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::LIST: - return make_cloning_ptr(std::move(proxy)); + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::LARGE_LIST: - return make_cloning_ptr(std::move(proxy)); + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::LIST_VIEW: - return make_cloning_ptr(std::move(proxy)); + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::LARGE_LIST_VIEW: - return make_cloning_ptr(std::move(proxy)); + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::FIXED_SIZED_LIST: - return make_cloning_ptr(std::move(proxy)); + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::STRUCT: - return make_cloning_ptr(std::move(proxy)); + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::STRING: - return make_cloning_ptr>(std::move(proxy)); + return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::FIXED_SIZE_BINARY: case data_type::TIMESTAMP: case data_type::MAP: diff --git a/src/list_value.cpp b/src/list_value.cpp index c8f4855c..938bf4ea 100644 --- a/src/list_value.cpp +++ b/src/list_value.cpp @@ -17,7 +17,7 @@ namespace sparrow { - list_value::list_value(const array_base* flat_array, size_type index_begin, size_type index_end) + list_value::list_value(const array_wrapper* flat_array, size_type index_begin, size_type index_end) : p_flat_array(flat_array) , m_index_begin(index_begin) , m_index_end(index_end) diff --git a/src/struct_value.cpp b/src/struct_value.cpp index a1863094..ee2c60c3 100644 --- a/src/struct_value.cpp +++ b/src/struct_value.cpp @@ -17,7 +17,7 @@ namespace sparrow { - struct_value::struct_value( const std::vector>& children, size_type index) + struct_value::struct_value( const std::vector& children, size_type index) : m_children(children) , m_index(index) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1fbd147c..8cb7a46c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -43,6 +43,7 @@ set(SPARROW_TESTS_SOURCES main.cpp test_algorithm.cpp test_allocator.cpp + test_array_wrapper.cpp test_arrow_array_schema_factory.cpp test_arrow_array_schema_proxy.cpp test_arrow_array_schema_utils.cpp diff --git a/test/test_array_wrapper.cpp b/test/test_array_wrapper.cpp new file mode 100644 index 00000000..d260caa2 --- /dev/null +++ b/test/test_array_wrapper.cpp @@ -0,0 +1,141 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sparrow/layout/array_wrapper.hpp" +#include "sparrow/layout/null_array.hpp" +#include "sparrow/layout/primitive_array.hpp" + +#include "external_array_data_creation.hpp" + +#include "doctest/doctest.h" + +namespace sparrow +{ + using test::make_arrow_proxy; + using testing_types = std::tuple< + null_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array, + primitive_array + >; + + TEST_SUITE("array_wrapper") + { + constexpr std::size_t size = 10u; + constexpr std::size_t offset = 0u; + + TEST_CASE_TEMPLATE_DEFINE("Constructor", AR, array_wrapper_ctor) + { + using array_type = AR; + using scalar_value_type = typename AR::inner_value_type; + using wrapper_type = array_wrapper_impl; + + SUBCASE("from rvalue") + { + array_type ar(make_arrow_proxy(size, offset)); + array_type expected(ar); + wrapper_type w(std::move(ar)); + CHECK_EQ(w.get_wrapped(), expected); + } + + SUBCASE("from pointer") + { + array_type ar(make_arrow_proxy(size, offset)); + wrapper_type w(&ar); + CHECK_EQ(w.get_wrapped(), ar); + } + + SUBCASE("from shared_ptr") + { + auto ptr = std::make_shared(make_arrow_proxy(size, offset)); + wrapper_type w(ptr); + CHECK_EQ(w.get_wrapped(), *ptr); + } + } + TEST_CASE_TEMPLATE_APPLY(array_wrapper_ctor, testing_types); + + TEST_CASE_TEMPLATE_DEFINE("unwrap_array", AR, array_wrapper_unwrap) + { + using array_type = AR; + using scalar_value_type = typename AR::inner_value_type; + using wrapper_type = array_wrapper_impl; + + SUBCASE("from rvalue") + { + array_type ar(make_arrow_proxy(size, offset)); + wrapper_type w(std::move(ar)); + CHECK_EQ(unwrap_array(w), w.get_wrapped()); + } + + SUBCASE("from pointer") + { + array_type ar(make_arrow_proxy(size, offset)); + wrapper_type w(&ar); + CHECK_EQ(unwrap_array(w), w.get_wrapped()); + } + + SUBCASE("from shared_ptr") + { + auto ptr = std::make_shared(make_arrow_proxy(size, offset)); + wrapper_type w(ptr); + CHECK_EQ(unwrap_array(w), w.get_wrapped()); + } + } + TEST_CASE_TEMPLATE_APPLY(array_wrapper_unwrap, testing_types); + + TEST_CASE_TEMPLATE_DEFINE("clone", AR, array_wrapper_clone) + { + using array_type = AR; + using scalar_value_type = typename AR::inner_value_type; + using wrapper_type = array_wrapper_impl; + + SUBCASE("from rvalue") + { + array_type ar(make_arrow_proxy(size, offset)); + wrapper_type w(std::move(ar)); + auto cl = w.clone(); + + CHECK_EQ(unwrap_array(*cl), w.get_wrapped()); + CHECK_NE(&(unwrap_array(*cl)), &(w.get_wrapped())); + } + + SUBCASE("from pointer") + { + array_type ar(make_arrow_proxy(size, offset)); + wrapper_type w(&ar); + auto cl = w.clone(); + CHECK_EQ(unwrap_array(*cl), ar); + CHECK_EQ(&(unwrap_array(*cl)), &ar); + } + + SUBCASE("from shared_ptr") + { + auto ptr = std::make_shared(make_arrow_proxy(size, offset)); + wrapper_type w(ptr); + auto cl = w.clone(); + CHECK_EQ(unwrap_array(*cl), *ptr); + CHECK_EQ(&(unwrap_array(*cl)), ptr.get()); + } + } + TEST_CASE_TEMPLATE_APPLY(array_wrapper_clone, testing_types); + } +} diff --git a/test/test_arrow_array.cpp b/test/test_arrow_array.cpp index 20e18359..8a7401d6 100644 --- a/test/test_arrow_array.cpp +++ b/test/test_arrow_array.cpp @@ -22,8 +22,6 @@ #include "arrow_array_schema_creation.hpp" #include "doctest/doctest.h" - - using buffer_type = sparrow::buffer; const buffer_type buffer_dummy({0, 1, 2, 3, 4}); using buffers_type = std::vector; diff --git a/test/test_dispatch.cpp b/test/test_dispatch.cpp index acc09b5a..028d2bf4 100644 --- a/test/test_dispatch.cpp +++ b/test/test_dispatch.cpp @@ -42,10 +42,10 @@ namespace sparrow TEST_CASE_TEMPLATE_DEFINE("array_size", AR, array_size_id) { using array_type = AR; + using wrapper_type = array_wrapper_impl; array_type ar(make_arrow_proxy()); - - const array_base& ar_base = ar; - auto size = array_size(ar_base); + wrapper_type w(&ar); + auto size = array_size(w); CHECK_EQ(size, ar.size()); } @@ -54,12 +54,13 @@ namespace sparrow TEST_CASE_TEMPLATE_DEFINE("array_element", AR, array_element_id) { using array_type = AR; + using wrapper_type = array_wrapper_impl; array_type ar(make_arrow_proxy()); + wrapper_type w(&ar); - const array_base& ar_base = ar; for (std::size_t i = 0; i < ar.size(); ++i) { - auto elem = array_element(ar_base, i); + auto elem = array_element(w, i); CHECK_EQ(elem.has_value(), ar[i].has_value()); if (elem.has_value()) { diff --git a/test/test_list_array.cpp b/test/test_list_array.cpp index ca482d6c..8cde65a2 100644 --- a/test/test_list_array.cpp +++ b/test/test_list_array.cpp @@ -26,9 +26,6 @@ namespace sparrow { TEST_CASE_TEMPLATE("list[T]",T, std::uint8_t, std::int32_t, float, double) { - - - using inner_scalar_type = T; using inner_nullable_type = nullable; @@ -88,39 +85,36 @@ namespace sparrow SUBCASE("cast flat array") { // get the flat values (offset is not applied) - array_base * flat_values = list_arr.raw_flat_array(); + array_wrapper* flat_values = list_arr.raw_flat_array(); // cast into a primitive array - primitive_array * flat_values_casted = static_cast *>(flat_values); + auto& flat_values_casted = unwrap_array>(*flat_values); + using primitive_size_type = typename primitive_array::size_type; // check the size - REQUIRE(flat_values_casted->size() == n_flat); + REQUIRE(flat_values_casted.size() == n_flat); // check that flat values are "iota" if constexpr(std::is_integral_v) { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ - CHECK((*flat_values_casted)[static_cast(i)].value() == i); + CHECK(flat_values_casted[static_cast(i)].value() == i); } } else { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ - CHECK((*flat_values_casted)[static_cast(i)].value() == doctest::Approx(static_cast(i))); + CHECK(flat_values_casted[static_cast(i)].value() == doctest::Approx(static_cast(i))); } } } } } - TEST_SUITE("list_view_array") { TEST_CASE_TEMPLATE("list_view_array[T]",T, std::uint8_t, std::int32_t, float, double) { - - - using inner_scalar_type = T; using inner_nullable_type = nullable; @@ -140,7 +134,7 @@ namespace sparrow ArrowArray arr{}; ArrowSchema schema{}; test::fill_schema_and_array_for_list_view_layout(schema, arr, flat_schema, flat_arr, sizes, {}, 0); - arrow_proxy proxy(&arr, &schema); + arrow_proxy proxy(&arr, &schema); // create a list array list_view_array list_arr(std::move(proxy)); @@ -152,7 +146,8 @@ namespace sparrow REQUIRE(list_arr[i].has_value()); CHECK(list_arr[i].value().size() == sizes[i]); } - } + } + SUBCASE("element-values") { std::size_t flat_index = 0; @@ -180,26 +175,26 @@ namespace sparrow SUBCASE("cast flat array") { // get the flat values (offset is not applied) - array_base * flat_values = list_arr.raw_flat_array(); + array_wrapper* flat_values = list_arr.raw_flat_array(); // cast into a primitive array - primitive_array * flat_values_casted = static_cast *>(flat_values); + auto& flat_values_casted = unwrap_array>(*flat_values); using primitive_size_type = typename primitive_array::size_type; // check the size - REQUIRE(flat_values_casted->size() == n_flat); + REQUIRE(flat_values_casted.size() == n_flat); // check that flat values are "iota" if constexpr(std::is_integral_v) { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ - CHECK((*flat_values_casted)[static_cast(i)].value() == i); + CHECK(flat_values_casted[static_cast(i)].value() == i); } } else { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ - CHECK((*flat_values_casted)[static_cast(i)].value() == doctest::Approx(static_cast(i))); + CHECK(flat_values_casted[static_cast(i)].value() == doctest::Approx(static_cast(i))); } } } diff --git a/test/test_list_value.cpp b/test/test_list_value.cpp index 567bd672..ad015f1f 100644 --- a/test/test_list_value.cpp +++ b/test/test_list_value.cpp @@ -23,14 +23,17 @@ namespace sparrow { using scalar_value_type = std::int32_t; + using array_type = primitive_array; + using wrapper_type = array_wrapper_impl; using test::make_arrow_proxy; TEST_SUITE("value_list") { TEST_CASE("size") { - primitive_array ar(make_arrow_proxy()); - list_value l(&ar, 2u, 7u); + array_type ar(make_arrow_proxy()); + wrapper_type w(&ar); + list_value l(&w, 2u, 7u); CHECK_EQ(l.size(), 5u); } @@ -39,8 +42,10 @@ namespace sparrow { std::size_t begin = 2u; std::size_t end = 7u; - primitive_array ar(make_arrow_proxy()); - list_value l(&ar, begin, end); + array_type ar(make_arrow_proxy()); + wrapper_type w(&ar); + + list_value l(&w, begin, end); for (std::size_t i = begin; i < end; ++i) { CHECK_EQ(l[i].has_value(), ar[begin+i].has_value()); diff --git a/test/test_memory.cpp b/test/test_memory.cpp index 47f6a4ae..89ffa3f6 100644 --- a/test/test_memory.cpp +++ b/test/test_memory.cpp @@ -125,6 +125,12 @@ TEST_SUITE("value_ptr") CHECK_EQ(vp->at(0), 42); } + TEST_CASE("get") + { + value_ptr vp(std::vector{42}); + CHECK_EQ(vp.get(), std::addressof(*vp)); + } + TEST_CASE("operator bool") { value_ptr vp;