diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index 5e87dec1..0477d0e0 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -14,6 +14,7 @@ #pragma once +#include #include #include "sparrow/arrow_array_schema_proxy.hpp" @@ -24,6 +25,17 @@ namespace sparrow { + /** + * Make a simple bitmap from an arrow proxy. + */ + inline dynamic_bitset_view make_simple_bitmap(arrow_proxy& arrow_proxy) + { + constexpr size_t bitmap_buffer_index = 0; + SPARROW_ASSERT_TRUE(arrow_proxy.buffers().size() > bitmap_buffer_index); + const auto bitmap_size = arrow_proxy.length() + arrow_proxy.offset(); + return {arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size}; + } + /** * Base class for array type erasure */ @@ -278,20 +290,20 @@ namespace sparrow auto array_crtp_base::has_value(size_type i) -> bitmap_reference { SPARROW_ASSERT_TRUE(i < size()); - return derived_cast().get_bitmap()[static_cast(i)]; + return *sparrow::next(bitmap_begin(), i); } template auto array_crtp_base::has_value(size_type i) const -> bitmap_const_reference { SPARROW_ASSERT_TRUE(i < size()); - return derived_cast().get_bitmap()[static_cast(i)]; + return *sparrow::next(bitmap_begin(), i); } template auto array_crtp_base::bitmap_begin() -> bitmap_iterator { - return derived_cast().get_bitmap().begin(); + return derived_cast().bitmap_begin_impl(); } template @@ -303,7 +315,7 @@ namespace sparrow template auto array_crtp_base::bitmap_begin() const -> const_bitmap_iterator { - return derived_cast().get_bitmap().begin(); + return derived_cast().bitmap_begin_impl(); } template diff --git a/include/sparrow/layout/dictionary_encoded_array.hpp b/include/sparrow/layout/dictionary_encoded_array.hpp index 77ec3883..c3722db7 100644 --- a/include/sparrow/layout/dictionary_encoded_array.hpp +++ b/include/sparrow/layout/dictionary_encoded_array.hpp @@ -16,13 +16,12 @@ #include "sparrow/arrow_array_schema_proxy.hpp" #include "sparrow/layout/array_base.hpp" -#include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap.hpp" +#include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp" #include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_iterator.hpp" #include "sparrow/layout/primitive_array.hpp" #include "sparrow/types/data_type.hpp" #include "sparrow/utils/contracts.hpp" - namespace sparrow { /* @@ -44,6 +43,15 @@ namespace sparrow static constexpr bool is_const = IC; }; + template + struct dictionary_bitmap_types + { + using size_type = size_t; + using reference = bool; + using const_reference = bool; + using iterator = validity_iterator; + using const_iterator = validity_iterator; + }; template class dictionary_encoded_array; @@ -68,7 +76,7 @@ namespace sparrow using iterator = layout_iterator; using const_iterator = layout_iterator; - using bitmap_type = dictionary_bitmap; + using bitmap_type = dictionary_bitmap_types; }; template @@ -129,15 +137,13 @@ namespace sparrow keys_layout m_keys_layout; values_layout m_values_layout; - bitmap_type m_bitmap; - bitmap_type& get_bitmap(); - const bitmap_type& get_bitmap() const; + bitmap_type::iterator bitmap_begin_impl(); + bitmap_type::const_iterator bitmap_begin_impl() const; static const const_reference& dummy_const_reference(); static keys_layout create_keys_layout(arrow_proxy& proxy); static values_layout create_values_layout(arrow_proxy& proxy); - bitmap_type make_bitmap(keys_layout& keys, values_layout& values); friend class array_crtp_base; friend class dictionary_iterator>; @@ -154,7 +160,6 @@ namespace sparrow , base_type(std::move(proxy)) , m_keys_layout(create_keys_layout(storage())) , m_values_layout(create_values_layout(storage())) - , m_bitmap(make_bitmap(m_keys_layout, m_values_layout)) { SPARROW_ASSERT_TRUE(data_type_is_integer(storage().data_type())); } @@ -239,18 +244,6 @@ namespace sparrow return values_layout{std::move(ar_dictionary)}; } - template - auto dictionary_encoded_array::get_bitmap() -> bitmap_type& - { - return m_bitmap; - } - - template - auto dictionary_encoded_array::get_bitmap() const -> const bitmap_type& - { - return m_bitmap; - } - template auto dictionary_encoded_array::dummy_const_reference() -> const const_reference& { @@ -266,9 +259,14 @@ namespace sparrow } template - auto - dictionary_encoded_array::make_bitmap(keys_layout& keys, values_layout& values) -> bitmap_type + auto dictionary_encoded_array::bitmap_begin_impl() -> bitmap_type::iterator + { + return {m_keys_layout, m_values_layout.bitmap(), 0}; + } + + template + auto dictionary_encoded_array::bitmap_begin_impl() const -> bitmap_type::const_iterator { - return bitmap_type{keys, values.bitmap()}; + return {m_keys_layout, m_values_layout.bitmap(), 0}; } } diff --git a/include/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap.hpp b/include/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap.hpp deleted file mode 100644 index f828caed..00000000 --- a/include/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap.hpp +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright 2024 Man Group Operations Limited -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp" - -#pragma once - -namespace sparrow -{ - - template - class dictionary_bitmap - { - public: - - using size_type = size_t; - using reference = bool; - using const_reference = bool; - using iterator = validity_iterator; - using const_iterator = validity_iterator; - - explicit dictionary_bitmap(KeysArray& keys, ValuesArrayBitmapRange values_bitmap_range); - - [[nodiscard]] size_type size() const noexcept; - [[nodiscard]] bool empty() const noexcept; - [[nodiscard]] size_type null_count() const noexcept; - - [[nodiscard]] bool test(size_type pos) const; - - [[nodiscard]] const_reference at(size_type pos) const; - - [[nodiscard]] const_reference operator[](size_type i) const; - - [[nodiscard]] const_iterator begin() const; - [[nodiscard]] const_iterator end() const; - [[nodiscard]] const_iterator cbegin() const; - [[nodiscard]] const_iterator cend() const; - - [[nodiscard]] const_reference front() const; - [[nodiscard]] const_reference back() const; - - private: - - [[nodiscard]] size_t calculate_null_count() const; - - KeysArray* m_keys; - ValuesArrayBitmapRange m_values_bitmap_range; - size_t m_null_count = 0; - }; - - /************************************ - * dictionary_bitmap implementation * - ************************************/ - - template - size_t dictionary_bitmap::calculate_null_count() const - { - return static_cast(std::count_if( - m_keys->begin(), - m_keys->end(), - [this](const auto& index) - { - return !index.has_value() || !m_values_bitmap_range[index.value()]; - } - )); - } - - template - dictionary_bitmap::dictionary_bitmap( - KeysArray& keys, - ValuesArrayBitmapRange values_array_bitmap_range - ) - : m_keys(&keys) - , m_values_bitmap_range(values_array_bitmap_range) - , m_null_count(calculate_null_count()) - { - } - - template - auto dictionary_bitmap::size() const noexcept -> size_type - { - return m_keys->size(); - } - - template - auto dictionary_bitmap::empty() const noexcept -> bool - { - return m_keys->empty(); - } - - template - auto dictionary_bitmap::null_count() const noexcept -> size_type - { - return m_null_count; - } - - template - auto dictionary_bitmap::test(size_type pos) const -> bool - { - const auto index = (*m_keys)[pos]; - return index.has_value() && m_values_bitmap_range[index.value()]; - } - - template - auto dictionary_bitmap::at(size_type pos) const -> const_reference - { - return test(pos); - } - - template - auto dictionary_bitmap::operator[](size_type i) const -> const_reference - { - return test(i); - } - - template - auto dictionary_bitmap::begin() const -> const_iterator - { - return const_iterator(*m_keys, m_values_bitmap_range, 0); - } - - template - auto dictionary_bitmap::end() const -> const_iterator - { - return const_iterator(*m_keys, m_values_bitmap_range, m_keys->size()); - } - - template - auto dictionary_bitmap::cbegin() const -> const_iterator - { - return begin(); - } - - template - auto dictionary_bitmap::cend() const -> const_iterator - { - return end(); - } - - template - auto dictionary_bitmap::front() const -> const_reference - { - return test(0); - } - - template - auto dictionary_bitmap::back() const -> const_reference - { - return test(m_keys->size() - 1); - } -} diff --git a/include/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp b/include/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp index 0dfe5f1f..48b09c2b 100644 --- a/include/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp +++ b/include/sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp @@ -55,7 +55,7 @@ namespace sparrow constexpr bool less_than(const self_type& rhs) const noexcept; const index_array_type* m_keys_array; - const value_array_bitmap_range_type* m_value_array_bitmap; + value_array_bitmap_range_type m_value_array_bitmap; size_type m_index; friend class iterator_access; @@ -72,7 +72,7 @@ namespace sparrow size_type index ) : m_keys_array(&index_array) - , m_value_array_bitmap(&value_bitmap) + , m_value_array_bitmap(value_bitmap) , m_index(index) { SPARROW_ASSERT_TRUE(m_index < m_keys_array->size()); @@ -88,7 +88,7 @@ namespace sparrow return false; } const auto key_value = key.value(); - const bool value_validity = (*m_value_array_bitmap)[key_value]; + const bool value_validity = m_value_array_bitmap[key_value]; return value_validity; } diff --git a/include/sparrow/layout/list_layout/list_array.hpp b/include/sparrow/layout/list_layout/list_array.hpp index 0bc8729d..6803287f 100644 --- a/include/sparrow/layout/list_layout/list_array.hpp +++ b/include/sparrow/layout/list_layout/list_array.hpp @@ -21,8 +21,9 @@ #include "sparrow/types/data_traits.hpp" #include "sparrow/utils/functor_index_iterator.hpp" #include "sparrow/utils/iterator.hpp" -#include "sparrow/utils/nullable.hpp" #include "sparrow/utils/memory.hpp" +#include "sparrow/utils/nullable.hpp" + namespace sparrow { @@ -99,8 +100,8 @@ namespace sparrow inner_reference value(size_type i); inner_const_reference value(size_type i) const; - bitmap_range get_bitmap(); - const_bitmap_range get_bitmap() const; + bitmap_type::iterator bitmap_begin_impl(); + bitmap_type::const_iterator bitmap_begin_impl() const; // data members flat_array_offset_type* p_list_offsets; @@ -188,14 +189,14 @@ namespace sparrow } template - auto list_array_impl::get_bitmap() -> bitmap_range + auto list_array_impl::bitmap_begin_impl() -> bitmap_type::iterator { - return bitmap_range(sparrow::next(m_bitmap.begin(), this->storage().offset()), m_bitmap.end()); + return next(m_bitmap.begin(), this->storage().offset()); } template - auto list_array_impl::get_bitmap() const -> const_bitmap_range + auto list_array_impl::bitmap_begin_impl() const -> bitmap_type::const_iterator { - return const_bitmap_range(sparrow::next(m_bitmap.cbegin(), this->storage().offset()), m_bitmap.cend()); + return next(m_bitmap.begin(), this->storage().offset()); } } diff --git a/include/sparrow/layout/primitive_array.hpp b/include/sparrow/layout/primitive_array.hpp index baff297f..79b2d69b 100644 --- a/include/sparrow/layout/primitive_array.hpp +++ b/include/sparrow/layout/primitive_array.hpp @@ -77,11 +77,8 @@ namespace sparrow using base_type::size; private: - - static bitmap_type make_bitmap(arrow_proxy& arrow_proxy); - - bitmap_range get_bitmap(); - const_bitmap_range get_bitmap() const; + bitmap_type::iterator bitmap_begin_impl(); + bitmap_type::const_iterator bitmap_begin_impl() const; using base_type::bitmap_begin; using base_type::bitmap_end; @@ -141,7 +138,7 @@ namespace sparrow primitive_array::primitive_array(arrow_proxy proxy) : array_base(proxy.data_type()) , base_type(std::move(proxy)) - , m_bitmap(make_bitmap(storage())) + , m_bitmap(make_simple_bitmap(storage())) { SPARROW_ASSERT_TRUE(detail::check_primitive_data_type(storage().data_type())); } @@ -205,23 +202,14 @@ namespace sparrow } template - auto primitive_array::make_bitmap(arrow_proxy& arrow_proxy) -> bitmap_type - { - constexpr size_t bitmap_buffer_index = 0; - SPARROW_ASSERT_TRUE(arrow_proxy.buffers().size() > bitmap_buffer_index); - const auto bitmap_size = arrow_proxy.length() + arrow_proxy.offset(); - return bitmap_type(arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size); - } - - template - auto primitive_array::get_bitmap() -> bitmap_range + auto primitive_array::bitmap_begin_impl() -> bitmap_type::iterator { - return bitmap_range(sparrow::next(m_bitmap.begin(), storage().offset()), m_bitmap.end()); + return next(m_bitmap.begin(), storage().offset()); } template - auto primitive_array::get_bitmap() const -> const_bitmap_range + auto primitive_array::bitmap_begin_impl() const -> bitmap_type::const_iterator { - return const_bitmap_range(sparrow::next(m_bitmap.cbegin(), storage().offset()), m_bitmap.cend()); + return next(m_bitmap.begin(), storage().offset()); } } diff --git a/include/sparrow/layout/struct_layout/struct_array.hpp b/include/sparrow/layout/struct_layout/struct_array.hpp index bfc2832f..c8e7e3f9 100644 --- a/include/sparrow/layout/struct_layout/struct_array.hpp +++ b/include/sparrow/layout/struct_layout/struct_array.hpp @@ -85,8 +85,8 @@ namespace sparrow inner_reference value(size_type i); inner_const_reference value(size_type i) const; - bitmap_range get_bitmap(); - const_bitmap_range get_bitmap() const; + bitmap_type::iterator bitmap_begin_impl(); + bitmap_type::const_iterator bitmap_begin_impl() const; // data members std::vector> m_children; @@ -104,10 +104,7 @@ namespace sparrow : array_base(proxy.data_type()) , base_type(std::move(proxy)) , m_children(this->storage().children().size(), nullptr) - , m_bitmap(bitmap_type{ - this->storage().buffers()[0].data(), - this->storage().length() + this->storage().offset() - }) + , m_bitmap(make_simple_bitmap(storage())) { for (std::size_t i = 0; i < m_children.size(); ++i) { @@ -163,13 +160,13 @@ namespace sparrow return struct_value{m_children, i}; } - auto struct_array::get_bitmap() -> bitmap_range + auto struct_array::bitmap_begin_impl() -> bitmap_type::iterator { - return bitmap_range(sparrow::next(m_bitmap.begin(), storage().offset()), m_bitmap.end()); + return next(m_bitmap.begin(), storage().offset()); } - auto struct_array::get_bitmap() const -> const_bitmap_range + auto struct_array::bitmap_begin_impl() const -> bitmap_type::const_iterator { - return const_bitmap_range(sparrow::next(m_bitmap.cbegin(), storage().offset()), m_bitmap.cend()); + return next(m_bitmap.begin(), storage().offset()); } } diff --git a/include/sparrow/layout/variable_size_binary_array.hpp b/include/sparrow/layout/variable_size_binary_array.hpp index a30c2d67..51c9cbb2 100644 --- a/include/sparrow/layout/variable_size_binary_array.hpp +++ b/include/sparrow/layout/variable_size_binary_array.hpp @@ -24,7 +24,6 @@ #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" - namespace sparrow { template @@ -223,10 +222,8 @@ namespace sparrow private: - bitmap_range get_bitmap(); - const_bitmap_range get_bitmap() const; - - static bitmap_type make_bitmap(arrow_proxy& arrow_proxy); + bitmap_type::iterator bitmap_begin_impl(); + bitmap_type::const_iterator bitmap_begin_impl() const; static constexpr size_t OFFSET_BUFFER_INDEX = 1; static constexpr size_t DATA_BUFFER_INDEX = 2; @@ -452,7 +449,7 @@ namespace sparrow variable_size_binary_array::variable_size_binary_array(arrow_proxy proxy) : array_base(proxy.data_type()) , base_type(std::move(proxy)) - , m_bitmap(make_bitmap(storage())) + , m_bitmap(make_simple_bitmap(storage())) { const auto type = storage().data_type(); SPARROW_ASSERT_TRUE(type == data_type::STRING || type == data_type::BINARY); // TODO: Add @@ -604,23 +601,14 @@ namespace sparrow } template - auto variable_size_binary_array::get_bitmap() -> bitmap_range - { - return bitmap_range(sparrow::next(m_bitmap.begin(), storage().offset()), m_bitmap.end()); - } - - template - auto variable_size_binary_array::get_bitmap() const -> const_bitmap_range + auto variable_size_binary_array::bitmap_begin_impl() -> bitmap_type::iterator { - return const_bitmap_range(sparrow::next(m_bitmap.cbegin(), storage().offset()), m_bitmap.end()); + return next(m_bitmap.begin(), storage().offset()); } template - auto variable_size_binary_array::make_bitmap(arrow_proxy& arrow_proxy) -> bitmap_type + auto variable_size_binary_array::bitmap_begin_impl() const -> bitmap_type::const_iterator { - constexpr size_t bitmap_buffer_index = 0; - SPARROW_ASSERT_TRUE(arrow_proxy.buffers().size() > bitmap_buffer_index); - const auto bitmap_size = arrow_proxy.length() + arrow_proxy.offset(); - return bitmap_type(arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size); + return next(m_bitmap.begin(), storage().offset()); } }