Skip to content

Commit

Permalink
Fixed copy semantic of union layouts
Browse files Browse the repository at this point in the history
  • Loading branch information
JohanMabille committed Oct 22, 2024
1 parent b71a304 commit 0559985
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 118 deletions.
56 changes: 25 additions & 31 deletions include/sparrow/layout/layout_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,45 +18,31 @@

namespace sparrow::detail
{

template<class LAYOUT_TYPE>
class layout_functor_base
{
public:
using layout_type = LAYOUT_TYPE;
constexpr layout_functor_base() = default;
constexpr layout_functor_base& operator=(layout_functor_base&&) = default;
constexpr layout_functor_base(const layout_functor_base&) = default;
constexpr layout_functor_base(layout_functor_base&&) = default;
constexpr layout_functor_base& operator=(const layout_functor_base&) = default;

constexpr layout_functor_base(layout_type * layout)
: p_layout(layout)
{
}

protected:
layout_type * p_layout = nullptr;
};


// Functor to get the value of the layout at index i.
//
// This is usefull to create a iterator over the values of a layout.
// This functor will be passed to the functor_index_iterator.
template<class LAYOUT_TYPE, class VALUE_TYPE>
class layout_value_functor : public layout_functor_base<LAYOUT_TYPE>
class layout_value_functor
{
public:
using base_type = layout_functor_base<LAYOUT_TYPE>;
using base_type::base_type;
using base_type::operator=;
public:

using value_type = VALUE_TYPE;
using layout_type = LAYOUT_TYPE;

constexpr explicit layout_value_functor(layout_type* layout = nullptr)
: p_layout(layout)
{
}

value_type operator()(std::size_t i) const
{
return this->p_layout->value(i);
}

private:

layout_type* p_layout;
};


Expand All @@ -65,18 +51,26 @@ namespace sparrow::detail
// This is usefull to create a iterator over the nullable-values of a layout.
// This functor will be passed to the functor_index_iterator.
template<class LAYOUT_TYPE, class VALUE_TYPE>
class layout_bracket_functor : public layout_functor_base<LAYOUT_TYPE>
class layout_bracket_functor
{
public:
using base_type = layout_functor_base<LAYOUT_TYPE>;
using base_type::base_type;
using base_type::operator=;

using value_type = VALUE_TYPE;
using layout_type = LAYOUT_TYPE;

constexpr explicit layout_bracket_functor(layout_type* layout = nullptr)
: p_layout(layout)
{
}

value_type operator()(std::size_t i) const
{
return this->p_layout->operator[](i);
}

private:

layout_type* p_layout;
};

}; // namespace sparrow::detail
124 changes: 105 additions & 19 deletions include/sparrow/layout/union_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,9 @@

namespace sparrow
{

class dense_union_array;
class sparse_union_array;


namespace detail
{
template<class T>
Expand Down Expand Up @@ -60,13 +58,16 @@ namespace sparrow
class union_array_crtp_base : public crtp_base<DERIVED>
{
public:

using self_type = union_array_crtp_base<DERIVED>;
using derived_type = DERIVED;
using inner_value_type = array_traits::inner_value_type;
using value_type = array_traits::const_reference;
using iterator = functor_index_iterator<detail::layout_bracket_functor<derived_type, value_type>>;
using const_iterator = functor_index_iterator<detail::layout_bracket_functor<const derived_type, value_type>>;
using functor_type = detail::layout_bracket_functor<derived_type, value_type>;
using const_functor_type = detail::layout_bracket_functor<const derived_type, value_type>;
using iterator = functor_index_iterator<functor_type>;
using const_iterator = functor_index_iterator<const_functor_type>;

explicit union_array_crtp_base(arrow_proxy proxy);
value_type operator[](std::size_t i) const;
value_type operator[](std::size_t i);

Expand All @@ -80,14 +81,22 @@ namespace sparrow
const_iterator cend() const;

protected:

using type_id_map = std::array<std::uint8_t, 256>;
static type_id_map parse_type_id_map(std::string_view format_string);

using children_type = std::vector<cloning_ptr<array_wrapper>>;
children_type make_children(arrow_proxy& proxy);

explicit union_array_crtp_base(arrow_proxy proxy);
union_array_crtp_base(const self_type& rhs);
self_type& operator=(const self_type& rhs);

arrow_proxy& get_arrow_proxy();

arrow_proxy m_proxy;
const std::uint8_t * p_type_ids;
std::vector<cloning_ptr<array_wrapper>> m_children;
children_type m_children;

// map from type-id to child-index
std::array<std::uint8_t, 256> m_type_id_map;
Expand All @@ -96,10 +105,19 @@ namespace sparrow
friend class array_wrapper_impl;
};

template <class D>
bool operator==(const union_array_crtp_base<D>& lhs, const union_array_crtp_base<D>& rhs);

class dense_union_array : public union_array_crtp_base<dense_union_array>
{
public:

using base_type = union_array_crtp_base<dense_union_array>;

explicit dense_union_array(arrow_proxy proxy);
dense_union_array(const dense_union_array& rhs);
dense_union_array& operator=(const dense_union_array& rhs);

private:
std::size_t element_offset(std::size_t i) const;
const std::int32_t * p_offsets;
Expand All @@ -109,7 +127,11 @@ namespace sparrow
class sparse_union_array : public union_array_crtp_base<sparse_union_array>
{
public:
using union_array_crtp_base<sparse_union_array>::union_array_crtp_base;

using base_type = union_array_crtp_base<sparse_union_array>;

explicit sparse_union_array(arrow_proxy proxy);

private:
std::size_t element_offset(std::size_t i) const;
friend class union_array_crtp_base<sparse_union_array>;
Expand All @@ -132,6 +154,10 @@ namespace sparrow
return ret;
}

/****************************************
* union_array_crtp_base implementation *
****************************************/

template <class DERIVED>
arrow_proxy& union_array_crtp_base<DERIVED>::get_arrow_proxy()
{
Expand All @@ -140,15 +166,30 @@ namespace sparrow

template <class DERIVED>
union_array_crtp_base<DERIVED>::union_array_crtp_base(arrow_proxy proxy)
: m_proxy(std::move(proxy)),
p_type_ids(reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0/*index of type-ids*/].data())),
m_children(m_proxy.children().size(), nullptr),
m_type_id_map(parse_type_id_map(m_proxy.format()))
: m_proxy(std::move(proxy))
, p_type_ids(reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0/*index of type-ids*/].data()))
, m_children(make_children(m_proxy))
, m_type_id_map(parse_type_id_map(m_proxy.format()))
{
}

template <class DERIVED>
union_array_crtp_base<DERIVED>::union_array_crtp_base(const self_type& rhs)
: self_type(rhs.m_proxy)
{
}

template <class DERIVED>
auto union_array_crtp_base<DERIVED>::operator=(const self_type& rhs) -> self_type&
{
for (std::size_t i = 0; i < m_children.size(); ++i)
if (this != &rhs)
{
m_children[i] = array_factory(m_proxy.children()[i].view());
m_proxy = rhs.m_proxy;
p_type_ids = reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0/*index of type-ids*/].data());
m_children = make_children(m_proxy);
m_type_id_map = parse_type_id_map(m_proxy.format());
}
return *this;
}

template <class DERIVED>
Expand All @@ -175,13 +216,13 @@ namespace sparrow
template <class DERIVED>
auto union_array_crtp_base<DERIVED>::begin() -> iterator
{
return iterator(detail::layout_bracket_functor<derived_type, value_type>{this}, 0);
return iterator(functor_type{&(this->derived_cast())}, 0);
}

template <class DERIVED>
auto union_array_crtp_base<DERIVED>::end() -> iterator
{
return iterator(detail::layout_bracket_functor<derived_type, value_type>{this}, this->size());
return iterator(functor_type{&(this->derived_cast())}, this->size());
}

template <class DERIVED>
Expand All @@ -199,25 +240,61 @@ namespace sparrow
template <class DERIVED>
auto union_array_crtp_base<DERIVED>::cbegin() const -> const_iterator
{
return const_iterator(detail::layout_bracket_functor<const derived_type, value_type>{this}, 0);
return const_iterator(const_functor_type{&(this->derived_cast())}, 0);
}

template <class DERIVED>
auto union_array_crtp_base<DERIVED>::cend() const -> const_iterator
{
return const_iterator(detail::layout_bracket_functor<const derived_type, value_type>{this}, this->size());
return const_iterator(const_functor_type{&(this->derived_cast())}, this->size());
}

template <class DERIVED>
auto union_array_crtp_base<DERIVED>::make_children(arrow_proxy& proxy) -> children_type
{
children_type children(proxy.children().size(), nullptr);
for (std::size_t i = 0; i < children.size(); ++i)
{
children[i] = array_factory(proxy.children()[i].view());
}
return children;
}

template <class D>
bool operator==(const union_array_crtp_base<D>& lhs, const union_array_crtp_base<D>& rhs)
{
return std::ranges::equal(lhs, rhs);
}

/************************************
* dense_union_array implementation *
************************************/

#ifdef __GNUC__
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wcast-align"
#endif
inline dense_union_array::dense_union_array(arrow_proxy proxy)
: union_array_crtp_base(std::move(proxy)),
p_offsets(reinterpret_cast<std::int32_t*>(m_proxy.buffers()[1/*index of offsets*/].data()))
: base_type(std::move(proxy))
, p_offsets(reinterpret_cast<std::int32_t*>(m_proxy.buffers()[1/*index of offsets*/].data()))
{
}

inline dense_union_array::dense_union_array(const dense_union_array& rhs)
: dense_union_array(rhs.m_proxy)
{
}

inline dense_union_array& dense_union_array::operator=(const dense_union_array& rhs)
{
if (this !=&rhs)
{
base_type::operator=(rhs);
p_offsets = reinterpret_cast<std::int32_t*>(m_proxy.buffers()[1/*index of offsets*/].data());
}
return *this;
}

#ifdef __GNUC__
# pragma GCC diagnostic pop
#endif
Expand All @@ -227,6 +304,15 @@ namespace sparrow
return static_cast<std::size_t>(p_offsets[i]) + m_proxy.offset();
}

/*************************************
* sparse_union_array implementation *
*************************************/

inline sparse_union_array::sparse_union_array(arrow_proxy proxy)
: base_type(std::move(proxy))
{
}

inline std::size_t sparse_union_array::element_offset(std::size_t i) const
{
return i + m_proxy.offset();
Expand Down
Loading

0 comments on commit 0559985

Please sign in to comment.