From 055998509cc1b8b231623a42d7920b4a92b3b3b5 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 22 Oct 2024 05:18:05 +0200 Subject: [PATCH] Fixed copy semantic of union layouts --- include/sparrow/layout/layout_utils.hpp | 56 +++++------ include/sparrow/layout/union_array.hpp | 124 ++++++++++++++++++++---- test/external_array_data_creation.cpp | 52 +++++----- test/external_array_data_creation.hpp | 8 +- test/test_union_array.cpp | 107 +++++++++++++------- 5 files changed, 229 insertions(+), 118 deletions(-) diff --git a/include/sparrow/layout/layout_utils.hpp b/include/sparrow/layout/layout_utils.hpp index 00ac6471..f97258a7 100644 --- a/include/sparrow/layout/layout_utils.hpp +++ b/include/sparrow/layout/layout_utils.hpp @@ -18,45 +18,31 @@ namespace sparrow::detail { - - template - class layout_functor_base - { - public: - using layout_type = LAYOUT_TYPE; - constexpr layout_functor_base() = default; - constexpr layout_functor_base& operator=(layout_functor_base&&) = default; - constexpr layout_functor_base(const layout_functor_base&) = default; - constexpr layout_functor_base(layout_functor_base&&) = default; - constexpr layout_functor_base& operator=(const layout_functor_base&) = default; - - constexpr layout_functor_base(layout_type * layout) - : p_layout(layout) - { - } - - protected: - layout_type * p_layout = nullptr; - }; - - // Functor to get the value of the layout at index i. // // This is usefull to create a iterator over the values of a layout. // This functor will be passed to the functor_index_iterator. template - class layout_value_functor : public layout_functor_base + class layout_value_functor { - public: - using base_type = layout_functor_base; - using base_type::base_type; - using base_type::operator=; + public: + using value_type = VALUE_TYPE; + using layout_type = LAYOUT_TYPE; + + constexpr explicit layout_value_functor(layout_type* layout = nullptr) + : p_layout(layout) + { + } value_type operator()(std::size_t i) const { return this->p_layout->value(i); } + + private: + + layout_type* p_layout; }; @@ -65,18 +51,26 @@ namespace sparrow::detail // This is usefull to create a iterator over the nullable-values of a layout. // This functor will be passed to the functor_index_iterator. template - class layout_bracket_functor : public layout_functor_base + class layout_bracket_functor { public: - using base_type = layout_functor_base; - using base_type::base_type; - using base_type::operator=; + using value_type = VALUE_TYPE; + using layout_type = LAYOUT_TYPE; + + constexpr explicit layout_bracket_functor(layout_type* layout = nullptr) + : p_layout(layout) + { + } value_type operator()(std::size_t i) const { return this->p_layout->operator[](i); } + + private: + + layout_type* p_layout; }; }; // namespace sparrow::detail diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp index 76af9d3c..6c27e1ad 100644 --- a/include/sparrow/layout/union_array.hpp +++ b/include/sparrow/layout/union_array.hpp @@ -27,11 +27,9 @@ namespace sparrow { - class dense_union_array; class sparse_union_array; - namespace detail { template @@ -60,13 +58,16 @@ namespace sparrow class union_array_crtp_base : public crtp_base { public: + + using self_type = union_array_crtp_base; using derived_type = DERIVED; using inner_value_type = array_traits::inner_value_type; using value_type = array_traits::const_reference; - using iterator = functor_index_iterator>; - using const_iterator = functor_index_iterator>; + using functor_type = detail::layout_bracket_functor; + using const_functor_type = detail::layout_bracket_functor; + using iterator = functor_index_iterator; + using const_iterator = functor_index_iterator; - explicit union_array_crtp_base(arrow_proxy proxy); value_type operator[](std::size_t i) const; value_type operator[](std::size_t i); @@ -80,14 +81,22 @@ namespace sparrow const_iterator cend() const; protected: + using type_id_map = std::array; static type_id_map parse_type_id_map(std::string_view format_string); + using children_type = std::vector>; + children_type make_children(arrow_proxy& proxy); + + explicit union_array_crtp_base(arrow_proxy proxy); + union_array_crtp_base(const self_type& rhs); + self_type& operator=(const self_type& rhs); + arrow_proxy& get_arrow_proxy(); arrow_proxy m_proxy; const std::uint8_t * p_type_ids; - std::vector> m_children; + children_type m_children; // map from type-id to child-index std::array m_type_id_map; @@ -96,10 +105,19 @@ namespace sparrow friend class array_wrapper_impl; }; + template + bool operator==(const union_array_crtp_base& lhs, const union_array_crtp_base& rhs); + class dense_union_array : public union_array_crtp_base { public: + + using base_type = union_array_crtp_base; + explicit dense_union_array(arrow_proxy proxy); + dense_union_array(const dense_union_array& rhs); + dense_union_array& operator=(const dense_union_array& rhs); + private: std::size_t element_offset(std::size_t i) const; const std::int32_t * p_offsets; @@ -109,7 +127,11 @@ namespace sparrow class sparse_union_array : public union_array_crtp_base { public: - using union_array_crtp_base::union_array_crtp_base; + + using base_type = union_array_crtp_base; + + explicit sparse_union_array(arrow_proxy proxy); + private: std::size_t element_offset(std::size_t i) const; friend class union_array_crtp_base; @@ -132,6 +154,10 @@ namespace sparrow return ret; } + /**************************************** + * union_array_crtp_base implementation * + ****************************************/ + template arrow_proxy& union_array_crtp_base::get_arrow_proxy() { @@ -140,15 +166,30 @@ namespace sparrow template union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) - : m_proxy(std::move(proxy)), - p_type_ids(reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data())), - m_children(m_proxy.children().size(), nullptr), - m_type_id_map(parse_type_id_map(m_proxy.format())) + : m_proxy(std::move(proxy)) + , p_type_ids(reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data())) + , m_children(make_children(m_proxy)) + , m_type_id_map(parse_type_id_map(m_proxy.format())) + { + } + + template + union_array_crtp_base::union_array_crtp_base(const self_type& rhs) + : self_type(rhs.m_proxy) + { + } + + template + auto union_array_crtp_base::operator=(const self_type& rhs) -> self_type& { - for (std::size_t i = 0; i < m_children.size(); ++i) + if (this != &rhs) { - m_children[i] = array_factory(m_proxy.children()[i].view()); + m_proxy = rhs.m_proxy; + p_type_ids = reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data()); + m_children = make_children(m_proxy); + m_type_id_map = parse_type_id_map(m_proxy.format()); } + return *this; } template @@ -175,13 +216,13 @@ namespace sparrow template auto union_array_crtp_base::begin() -> iterator { - return iterator(detail::layout_bracket_functor{this}, 0); + return iterator(functor_type{&(this->derived_cast())}, 0); } template auto union_array_crtp_base::end() -> iterator { - return iterator(detail::layout_bracket_functor{this}, this->size()); + return iterator(functor_type{&(this->derived_cast())}, this->size()); } template @@ -199,25 +240,61 @@ namespace sparrow template auto union_array_crtp_base::cbegin() const -> const_iterator { - return const_iterator(detail::layout_bracket_functor{this}, 0); + return const_iterator(const_functor_type{&(this->derived_cast())}, 0); } template auto union_array_crtp_base::cend() const -> const_iterator { - return const_iterator(detail::layout_bracket_functor{this}, this->size()); + return const_iterator(const_functor_type{&(this->derived_cast())}, this->size()); } + template + auto union_array_crtp_base::make_children(arrow_proxy& proxy) -> children_type + { + children_type children(proxy.children().size(), nullptr); + for (std::size_t i = 0; i < children.size(); ++i) + { + children[i] = array_factory(proxy.children()[i].view()); + } + return children; + } + + template + bool operator==(const union_array_crtp_base& lhs, const union_array_crtp_base& rhs) + { + return std::ranges::equal(lhs, rhs); + } + + /************************************ + * dense_union_array implementation * + ************************************/ + #ifdef __GNUC__ # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wcast-align" #endif inline dense_union_array::dense_union_array(arrow_proxy proxy) - : union_array_crtp_base(std::move(proxy)), - p_offsets(reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data())) + : base_type(std::move(proxy)) + , p_offsets(reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data())) + { + } + + inline dense_union_array::dense_union_array(const dense_union_array& rhs) + : dense_union_array(rhs.m_proxy) { } + inline dense_union_array& dense_union_array::operator=(const dense_union_array& rhs) + { + if (this !=&rhs) + { + base_type::operator=(rhs); + p_offsets = reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data()); + } + return *this; + } + #ifdef __GNUC__ # pragma GCC diagnostic pop #endif @@ -227,6 +304,15 @@ namespace sparrow return static_cast(p_offsets[i]) + m_proxy.offset(); } + /************************************* + * sparse_union_array implementation * + *************************************/ + + inline sparse_union_array::sparse_union_array(arrow_proxy proxy) + : base_type(std::move(proxy)) + { + } + inline std::size_t sparse_union_array::element_offset(std::size_t i) const { return i + m_proxy.offset(); diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 26a2ea92..838761cd 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -241,10 +241,6 @@ namespace sparrow::test std::make_move_iterator(children_schemas.end()), schema.children, [](auto&& child) { return new ArrowSchema(std::move(child)); }); - /*for (std::size_t i = 0; i < children_schemas.size(); ++i) - { - schema.children[i] = &children_schemas[i]; - }*/ schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -268,10 +264,6 @@ namespace sparrow::test std::make_move_iterator(children_arrays.end()), arr.children, [](auto&& child) { return new ArrowArray(std::move(child)); }); - /*for (std::size_t i = 0; i < children_arrays.size(); ++i) - { - arr.children[i] = &children_arrays[i]; - }*/ arr.dictionary = nullptr; arr.release = &release_arrow_array; @@ -321,10 +313,10 @@ namespace sparrow::test void fill_schema_and_array_for_sparse_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, - const std::vector & type_ids, - const std::string & format + std::vector&& children_schemas, + std::vector&& children_arrays, + const std::vector& type_ids, + const std::string& format ){ schema.format = format.c_str(); schema.name = "test"; @@ -332,10 +324,10 @@ namespace sparrow::test schema.n_children = static_cast(children_schemas.size()); schema.children = new ArrowSchema*[children_schemas.size()]; - for (std::size_t i = 0; i < children_schemas.size(); ++i) - { - schema.children[i] = &children_schemas[i]; - } + std::transform(std::make_move_iterator(children_schemas.begin()), + std::make_move_iterator(children_schemas.end()), + schema.children, + [](auto&& child) { return new ArrowSchema(std::move(child)); }); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -355,10 +347,10 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[children_arrays.size()]; - for (std::size_t i = 0; i < children_arrays.size(); ++i) - { - arr.children[i] = &children_arrays[i]; - } + std::transform(std::make_move_iterator(children_arrays.begin()), + std::make_move_iterator(children_arrays.end()), + arr.children, + [](auto&& child) { return new ArrowArray(std::move(child)); }); arr.dictionary = nullptr; arr.release = &release_arrow_array; @@ -367,8 +359,8 @@ namespace sparrow::test void fill_schema_and_array_for_dense_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector & type_ids, const std::vector & offsets, const std::string & format @@ -379,10 +371,10 @@ namespace sparrow::test schema.n_children = static_cast(children_schemas.size()); schema.children = new ArrowSchema*[children_schemas.size()]; - for (std::size_t i = 0; i < children_schemas.size(); ++i) - { - schema.children[i] = &children_schemas[i]; - } + std::transform(std::make_move_iterator(children_schemas.begin()), + std::make_move_iterator(children_schemas.end()), + schema.children, + [](auto&& child) { return new ArrowSchema(std::move(child)); }); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -407,10 +399,10 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[children_arrays.size()]; - for (std::size_t i = 0; i < children_arrays.size(); ++i) - { - arr.children[i] = &children_arrays[i]; - } + std::transform(std::make_move_iterator(children_arrays.begin()), + std::make_move_iterator(children_arrays.end()), + arr.children, + [](auto&& child) { return new ArrowArray(std::move(child)); }); arr.dictionary = nullptr; arr.release = &release_arrow_array; diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index e22ab827..3529d749 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -293,8 +293,8 @@ namespace sparrow::test void fill_schema_and_array_for_sparse_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector & type_ids, const std::string & format ); @@ -302,8 +302,8 @@ namespace sparrow::test void fill_schema_and_array_for_dense_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector & type_ids, const std::vector & offsets, const std::string & format diff --git a/test/test_union_array.cpp b/test/test_union_array.cpp index ed62d15a..e599d94f 100644 --- a/test/test_union_array.cpp +++ b/test/test_union_array.cpp @@ -25,15 +25,10 @@ namespace sparrow { - TEST_SUITE("union") - { - - TEST_CASE("sparse_union") + namespace test + { + arrow_proxy make_sparse_union_proxy(const std::string& format_string, std::size_t n, bool altered = false) { - - const std::string format_string = "+us:3,4"; - const std::size_t n = 4; - std::vector children_arrays(2); std::vector children_schemas(2); @@ -43,22 +38,76 @@ namespace sparrow test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0/*offset*/, {}); children_schemas[1].name = "item 1"; - ArrowArray arr{}; ArrowSchema schema{}; std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + if (altered) + { + type_ids[0] = std::uint8_t(4); + } test::fill_schema_and_array_for_sparse_union( - schema, arr, children_schemas, children_arrays, type_ids, format_string + schema, arr, std::move(children_schemas), std::move(children_arrays), type_ids, format_string ); - arrow_proxy proxy(&arr, &schema); + return arrow_proxy(std::move(arr), std::move(schema)); + } + + arrow_proxy make_dense_union_proxy(const std::string& format_string, std::size_t n_c, bool altered = false) + { + std::vector children_arrays(2); + std::vector children_schemas(2); + + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n_c, 0/*offset*/, {}); + children_schemas[0].name = "item 0"; + + test::fill_schema_and_array(children_schemas[1], children_arrays[1], n_c, 0/*offset*/, {}); + children_schemas[1].name = "item 1"; + + ArrowArray arr{}; + ArrowSchema schema{}; + + std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + if (altered) + { + type_ids[0] = std::uint8_t(4); + } + std::vector offsets = {0,0,1,1}; + + test::fill_schema_and_array_for_dense_union( + schema, arr, std::move(children_schemas), std::move(children_arrays), type_ids, offsets, format_string + ); + + return arrow_proxy(std::move(arr), std::move(schema)); + } + } + + TEST_SUITE("union") + { + TEST_CASE("sparse_union") + { + + const std::string format_string = "+us:3,4"; + const std::size_t n = 4; + + auto proxy = test::make_sparse_union_proxy(format_string, n); sparse_union_array uarr(std::move(proxy)); REQUIRE(uarr.size() == n); + SUBCASE("copy") + { + sparse_union_array uarr2(uarr); + CHECK_EQ(uarr2, uarr); + + sparse_union_array uarr3(test::make_sparse_union_proxy(format_string, n, true)); + CHECK_NE(uarr3, uarr); + uarr3 = uarr; + CHECK_EQ(uarr3, uarr); + } + SUBCASE("operator[]") { for (std::size_t i = 0; i < n; ++i) @@ -125,39 +174,29 @@ namespace sparrow } } + TEST_CASE("dense_union") { - const std::string format_string = "+ud:3,4"; const std::size_t n_c = 2; const std::size_t n = 4; - std::vector children_arrays(2); - std::vector children_schemas(2); - - test::fill_schema_and_array(children_schemas[0], children_arrays[0], n_c, 0/*offset*/, {}); - children_schemas[0].name = "item 0"; - - test::fill_schema_and_array(children_schemas[1], children_arrays[1], n_c, 0/*offset*/, {}); - children_schemas[1].name = "item 1"; - - - ArrowArray arr{}; - ArrowSchema schema{}; - - std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; - std::vector offsets = {0,0,1,1}; - - test::fill_schema_and_array_for_dense_union( - schema, arr, children_schemas, children_arrays, type_ids, offsets, format_string - ); - - arrow_proxy proxy(&arr, &schema); - + auto proxy = test::make_dense_union_proxy(format_string, n_c); dense_union_array uarr(std::move(proxy)); REQUIRE(uarr.size() == n); + SUBCASE("copy") + { + dense_union_array uarr2(uarr); + CHECK_EQ(uarr2, uarr); + + dense_union_array uarr3(test::make_dense_union_proxy(format_string, n_c, true)); + CHECK_NE(uarr3, uarr); + uarr3 = uarr; + CHECK_EQ(uarr3, uarr); + } + SUBCASE("operator[]") { for (std::size_t i = 0; i < n; ++i)