Skip to content

Commit

Permalink
Remove dictionary bitmap, create common function to create simple bitmap
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex-PLACET committed Oct 8, 2024
1 parent 7bfb98f commit ca5d541
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 249 deletions.
20 changes: 16 additions & 4 deletions include/sparrow/layout/array_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#pragma once

#include <cstdint>
#include <ranges>

#include "sparrow/arrow_array_schema_proxy.hpp"
Expand All @@ -24,6 +25,17 @@

namespace sparrow
{
/**
* Make a simple bitmap from an arrow proxy.
*/
inline dynamic_bitset_view<uint8_t> make_simple_bitmap(arrow_proxy& arrow_proxy)
{
constexpr size_t bitmap_buffer_index = 0;
SPARROW_ASSERT_TRUE(arrow_proxy.buffers().size() > bitmap_buffer_index);
const auto bitmap_size = arrow_proxy.length() + arrow_proxy.offset();
return {arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size};
}

/**
* Base class for array type erasure
*/
Expand Down Expand Up @@ -278,20 +290,20 @@ namespace sparrow
auto array_crtp_base<D>::has_value(size_type i) -> bitmap_reference
{
SPARROW_ASSERT_TRUE(i < size());
return derived_cast().get_bitmap()[static_cast<difference_type>(i)];
return *sparrow::next(bitmap_begin(), i);
}

template <class D>
auto array_crtp_base<D>::has_value(size_type i) const -> bitmap_const_reference
{
SPARROW_ASSERT_TRUE(i < size());
return derived_cast().get_bitmap()[static_cast<difference_type>(i)];
return *sparrow::next(bitmap_begin(), i);
}

template <class D>
auto array_crtp_base<D>::bitmap_begin() -> bitmap_iterator
{
return derived_cast().get_bitmap().begin();
return derived_cast().bitmap_begin_impl();
}

template <class D>
Expand All @@ -303,7 +315,7 @@ namespace sparrow
template <class D>
auto array_crtp_base<D>::bitmap_begin() const -> const_bitmap_iterator
{
return derived_cast().get_bitmap().begin();
return derived_cast().bitmap_begin_impl();
}

template <class D>
Expand Down
44 changes: 21 additions & 23 deletions include/sparrow/layout/dictionary_encoded_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,12 @@

#include "sparrow/arrow_array_schema_proxy.hpp"
#include "sparrow/layout/array_base.hpp"
#include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap.hpp"
#include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_bitmap_iterator.hpp"
#include "sparrow/layout/dictionary_encoded_array/dictionary_encoded_array_iterator.hpp"
#include "sparrow/layout/primitive_array.hpp"
#include "sparrow/types/data_type.hpp"
#include "sparrow/utils/contracts.hpp"


namespace sparrow
{
/*
Expand All @@ -44,6 +43,15 @@ namespace sparrow
static constexpr bool is_const = IC;
};

template <typename KeysArray, typename ValuesArrayBitmapRange, typename ValuesArrayConstBitmapRange>
struct dictionary_bitmap_types
{
using size_type = size_t;
using reference = bool;
using const_reference = bool;
using iterator = validity_iterator<KeysArray, ValuesArrayBitmapRange>;
using const_iterator = validity_iterator<KeysArray, ValuesArrayConstBitmapRange>;
};

template <std::integral IT, class SL, layout_offset OT = std::int64_t>
class dictionary_encoded_array;
Expand All @@ -68,7 +76,7 @@ namespace sparrow
using iterator = layout_iterator<array_type, false>;
using const_iterator = layout_iterator<array_type, true>;

using bitmap_type = dictionary_bitmap<keys_layout, typename values_layout::const_bitmap_range>;
using bitmap_type = dictionary_bitmap_types<keys_layout, typename values_layout::bitmap_range, typename values_layout::const_bitmap_range>;
};

template <std::integral IT, class SL, layout_offset OT>
Expand Down Expand Up @@ -129,15 +137,13 @@ namespace sparrow

keys_layout m_keys_layout;
values_layout m_values_layout;
bitmap_type m_bitmap;

bitmap_type& get_bitmap();
const bitmap_type& get_bitmap() const;
bitmap_type::iterator bitmap_begin_impl();
bitmap_type::const_iterator bitmap_begin_impl() const;

static const const_reference& dummy_const_reference();
static keys_layout create_keys_layout(arrow_proxy& proxy);
static values_layout create_values_layout(arrow_proxy& proxy);
bitmap_type make_bitmap(keys_layout& keys, values_layout& values);

friend class array_crtp_base<self_type>;
friend class dictionary_iterator<dictionary_value_traits<inner_types, true>>;
Expand All @@ -154,7 +160,6 @@ namespace sparrow
, base_type(std::move(proxy))
, m_keys_layout(create_keys_layout(storage()))
, m_values_layout(create_values_layout(storage()))
, m_bitmap(make_bitmap(m_keys_layout, m_values_layout))
{
SPARROW_ASSERT_TRUE(data_type_is_integer(storage().data_type()));
}
Expand Down Expand Up @@ -239,18 +244,6 @@ namespace sparrow
return values_layout{std::move(ar_dictionary)};
}

template <std::integral IT, class SL, layout_offset OT>
auto dictionary_encoded_array<IT, SL, OT>::get_bitmap() -> bitmap_type&
{
return m_bitmap;
}

template <std::integral IT, class SL, layout_offset OT>
auto dictionary_encoded_array<IT, SL, OT>::get_bitmap() const -> const bitmap_type&
{
return m_bitmap;
}

template <std::integral IT, class SL, layout_offset OT>
auto dictionary_encoded_array<IT, SL, OT>::dummy_const_reference() -> const const_reference&
{
Expand All @@ -266,9 +259,14 @@ namespace sparrow
}

template <std::integral IT, class SL, layout_offset OT>
auto
dictionary_encoded_array<IT, SL, OT>::make_bitmap(keys_layout& keys, values_layout& values) -> bitmap_type
auto dictionary_encoded_array<IT, SL, OT>::bitmap_begin_impl() -> bitmap_type::iterator
{
return {m_keys_layout, m_values_layout.bitmap(), 0};
}

template <std::integral IT, class SL, layout_offset OT>
auto dictionary_encoded_array<IT, SL, OT>::bitmap_begin_impl() const -> bitmap_type::const_iterator
{
return bitmap_type{keys, values.bitmap()};
return {m_keys_layout, m_values_layout.bitmap(), 0};
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace sparrow
constexpr bool less_than(const self_type& rhs) const noexcept;

const index_array_type* m_keys_array;
const value_array_bitmap_range_type* m_value_array_bitmap;
value_array_bitmap_range_type m_value_array_bitmap;
size_type m_index;

friend class iterator_access;
Expand All @@ -72,7 +72,7 @@ namespace sparrow
size_type index
)
: m_keys_array(&index_array)
, m_value_array_bitmap(&value_bitmap)
, m_value_array_bitmap(value_bitmap)
, m_index(index)
{
SPARROW_ASSERT_TRUE(m_index < m_keys_array->size());
Expand All @@ -88,7 +88,7 @@ namespace sparrow
return false;
}
const auto key_value = key.value();
const bool value_validity = (*m_value_array_bitmap)[key_value];
const bool value_validity = m_value_array_bitmap[key_value];
return value_validity;
}

Expand Down
15 changes: 8 additions & 7 deletions include/sparrow/layout/list_layout/list_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
#include "sparrow/types/data_traits.hpp"
#include "sparrow/utils/functor_index_iterator.hpp"
#include "sparrow/utils/iterator.hpp"
#include "sparrow/utils/nullable.hpp"
#include "sparrow/utils/memory.hpp"
#include "sparrow/utils/nullable.hpp"


namespace sparrow
{
Expand Down Expand Up @@ -99,8 +100,8 @@ namespace sparrow
inner_reference value(size_type i);
inner_const_reference value(size_type i) const;

bitmap_range get_bitmap();
const_bitmap_range get_bitmap() const;
bitmap_type::iterator bitmap_begin_impl();
bitmap_type::const_iterator bitmap_begin_impl() const;

// data members
flat_array_offset_type* p_list_offsets;
Expand Down Expand Up @@ -188,14 +189,14 @@ namespace sparrow
}

template <bool BIG>
auto list_array_impl<BIG>::get_bitmap() -> bitmap_range
auto list_array_impl<BIG>::bitmap_begin_impl() -> bitmap_type::iterator
{
return bitmap_range(sparrow::next(m_bitmap.begin(), this->storage().offset()), m_bitmap.end());
return next(m_bitmap.begin(), this->storage().offset());
}

template <bool BIG>
auto list_array_impl<BIG>::get_bitmap() const -> const_bitmap_range
auto list_array_impl<BIG>::bitmap_begin_impl() const -> bitmap_type::const_iterator
{
return const_bitmap_range(sparrow::next(m_bitmap.cbegin(), this->storage().offset()), m_bitmap.cend());
return next(m_bitmap.begin(), this->storage().offset());
}
}
Loading

0 comments on commit ca5d541

Please sign in to comment.