Skip to content

Commit

Permalink
add constexpr dataSizeBytes() to BitpackedUintVector
Browse files Browse the repository at this point in the history
  • Loading branch information
hurchalla committed May 30, 2024
1 parent c13ac53 commit be70ee0
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 36 deletions.
27 changes: 23 additions & 4 deletions include/hurchalla/util/BitpackedUintVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@
namespace hurchalla {


// This class is intended to provide the following, in order of importance:
// 1) Provides a very large integer buffer (often expected to be above
// 1 billion integers) with minimal memory use, when all of the integers
// are known to use only a limited number of bits (e.g. uint16_t values
// that never use more than 13 bits).
// 2) Provides portable serialization and deserialization functions (i.e.
// endianness and alignment are non-issues).
// 3) Excellent performance, with efficient utilization of CPU caches.
// Requirements 1 and 2 may acceptably cause loss in performance, though
// read/write speed should be extremely good (perhaps near optimal) when
// accesses of memory or CPU cache is a bottleneck.


template <typename U, unsigned int element_bitlen>
struct BitpackedUintVector
{
Expand Down Expand Up @@ -58,8 +71,14 @@ struct BitpackedUintVector
// returns the maximum value that fits within element_bitlen bits.
HURCHALLA_FORCE_INLINE static constexpr U max_allowed_value()
{
return detail::
ImplBitpackedUintVector<U, element_bitlen>::max_allowed_value();
return decltype(impl_buv)::max_allowed_value();
}

// returns 0 if element_count is an invalid size
HURCHALLA_FORCE_INLINE static constexpr
std::size_t dataSizeBytes(size_type element_count)
{
return decltype(impl_buv)::dataSizeBytes(element_count);
}

HURCHALLA_FORCE_INLINE std::size_t dataSizeBytes() const
Expand All @@ -77,9 +96,9 @@ struct BitpackedUintVector
// Then later you or another person can call getFormatID() as part of a
// handshake to ensure matching format ID, prior to using that serialized
// data (for the BitpackedUintVector constructor).
HURCHALLA_FORCE_INLINE static constexpr uint64_t getFormatID()
HURCHALLA_FORCE_INLINE static constexpr uint32_t getFormatID()
{
return detail::ImplBitpackedUintVector<U,element_bitlen>::getFormatID();
return decltype(impl_buv)::getFormatID();
}

private:
Expand Down
66 changes: 37 additions & 29 deletions include/hurchalla/util/detail/ImplBitpackedUintVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,41 @@ struct ImplBitpackedUintVector
- 1 + (static_cast<U>(1) << (element_bitlen-1));
}

static constexpr uint64_t getFormatID()
static constexpr uint32_t getFormatID()
{
// The constant used below should never be changed (change the
// internal dataVersion if this file changes to use a different
// data format/data encoding). The exact constant value used below
// was just a random number, but it needs to stay constant.
return UINT64_C(6595123244124255148) + dataVersion;
return UINT32_C(2132441242) + dataVersion;
}

// returns 0 if count is an invalid size
static constexpr
std::size_t dataSizeBytes(size_type count)
{
std::size_t starting_byte, bit_offset;
bool overflowed;
attemptGetLocationFromIndex(count, starting_byte, bit_offset, overflowed);
if (overflowed)
return 0;

constexpr std::size_t MAXSIZET= std::numeric_limits<std::size_t>::max();
std::size_t bytes_needed = starting_byte;
if (bit_offset != 0) {
HPBC_CONSTEXPR_ASSERT(bit_offset < 8);
if (bytes_needed == MAXSIZET)
return 0;
else
++bytes_needed;
}
// We want to allocate one byte more than we strictly need, so that
// setAt() can safely write one byte beyond where it strictly should.
if (bytes_needed == MAXSIZET)
return 0;
else
++bytes_needed;
return bytes_needed;
}

std::size_t dataSizeBytes() const
Expand All @@ -103,45 +131,25 @@ struct ImplBitpackedUintVector
// each of size element_bitlen.
static std::size_t getBytesFromCount(size_type count)
{
std::size_t starting_byte, bit_offset;
bool overflowed;
attemptGetLocationFromIndex(count, starting_byte, bit_offset, overflowed);
if (overflowed)
std::size_t bytes_needed = dataSizeBytes(count);
if (bytes_needed == 0)
throw std::length_error("BitpackedUintVector size too large, would overflow");

// Since attemptGetLocationFromIndex() returned without overflow, we
// know that any value <= 'count' can be converted into 'starting_byte'
// and 'bit_offset' correctly (i.e. without overflow). This lets us
// Since dataSizeBytes() returned without overflow, we know that any
// value <= 'count' can be converted into 'starting_byte' and
// 'bit_offset' correctly (i.e. without overflow). This lets us
// establish the class invariant that any index that satisfies
// (index < count) will be converted without overflow by
// attemptGetLocationFromIndex().

constexpr std::size_t MAXSIZET= std::numeric_limits<std::size_t>::max();

std::size_t bytes_needed = starting_byte;
if (bit_offset != 0) {
HPBC_ASSERT2(bit_offset < 8);
if (bytes_needed == MAXSIZET)
throw std::length_error("BitpackedUintVector size too large, would overflow");
else
++bytes_needed;
}

// We want to allocate one byte more than we strictly need, so that
// setAt() can safely write one byte beyond where it strictly should.
if (bytes_needed == MAXSIZET)
throw std::length_error("BitpackedUintVector size too large, would overflow");
else
++bytes_needed;

return bytes_needed;
}


// if this function returns with overflowed == false,
// then it guarantees any value <= 'index' can be converted into
// 'starting_byte' and 'bit_offset' correctly (i.e. without overflow).
HURCHALLA_FORCE_INLINE static
HURCHALLA_FORCE_INLINE static constexpr
void attemptGetLocationFromIndex(size_type index,
std::size_t& starting_byte, std::size_t& bit_offset, bool& overflowed)
{
Expand Down Expand Up @@ -191,7 +199,7 @@ struct ImplBitpackedUintVector
bit_offset = static_cast<std::size_t>(
(static_cast<P>(index) * ELEM_BITLEN) % 8);
#endif
HPBC_POSTCONDITION2(bit_offset < 8);
HPBC_CONSTEXPR_POSTCONDITION(bit_offset < 8);
}


Expand Down
2 changes: 1 addition & 1 deletion msvc_build_tests.bat
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ REM cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_LIBS=ON -G "Visual Studio 15"
REM the above line appears to build x86-32. To get x64:
REM cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_LIBS=ON -G "Visual Studio 15 2017 Win64"

cmake -DTEST_HURCHALLA_LIBS=ON -S. -B.\%build_dir%
cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_LIBS=ON -G "Visual Studio 17 2022" -A Win32
if %errorlevel% neq 0 exit /b %errorlevel%
cmake --build .\%build_dir% --config Release
if %errorlevel% neq 0 exit /b %errorlevel%
Expand Down
8 changes: 6 additions & 2 deletions test/test_BitpackedUintVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void check_buv(std::vector<uint64_t>& vec)
EXPECT_TRUE(buv.max_allowed_value() == mask);

// nothing to test with getFormatID() except to check it compiles
uint64_t id = buv.getFormatID();
constexpr uint32_t id = buv.getFormatID();
(void)id;

for (size_type i = 0; i < vec.size(); ++i) {
Expand All @@ -47,8 +47,12 @@ void check_buv(std::vector<uint64_t>& vec)

// serialize
const unsigned char* data = buv.data();
std::size_t num_bytes = buv.dataSizeBytes();
size_type num_elements = buv.size();
std::size_t num_bytes = buv.dataSizeBytes();

// side test that the constexpr dataSizeBytes() is valid
std::size_t num_bytes2 = decltype(buv)::dataSizeBytes(num_elements);
EXPECT_TRUE(num_bytes == num_bytes2);

// use memcpy to roughly fake write/read to file, network, etc.
std::unique_ptr<unsigned char[]> buffer(new unsigned char[num_bytes]());
Expand Down

0 comments on commit be70ee0

Please sign in to comment.