add constexpr dataSizeBytes() to BitpackedUintVector

hurchalla · May 30, 2024 · be70ee0 · be70ee0
1 parent c13ac53
commit be70ee0
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 36 deletions.
diff --git a/include/hurchalla/util/BitpackedUintVector.h b/include/hurchalla/util/BitpackedUintVector.h
@@ -14,6 +14,19 @@
 namespace hurchalla {
 
 
+// This class is intended to provide the following, in order of importance:
+// 1) Provides a very large integer buffer (often expected to be above
+//    1 billion integers) with minimal memory use, when all of the integers
+//    are known to use only a limited number of bits (e.g. uint16_t values
+//    that never use more than 13 bits).
+// 2) Provides portable serialization and deserialization functions (i.e.
+//    endianness and alignment are non-issues).
+// 3) Excellent performance, with efficient utilization of CPU caches.
+//    Requirements 1 and 2 may acceptably cause loss in performance, though
+//    read/write speed should be extremely good (perhaps near optimal) when
+//    accesses of memory or CPU cache is a bottleneck.
+
+
 template <typename U, unsigned int element_bitlen>
 struct BitpackedUintVector
 {
@@ -58,8 +71,14 @@ struct BitpackedUintVector
     // returns the maximum value that fits within element_bitlen bits.
     HURCHALLA_FORCE_INLINE static constexpr U max_allowed_value()
     {
-        return detail::
-                ImplBitpackedUintVector<U, element_bitlen>::max_allowed_value();
+        return decltype(impl_buv)::max_allowed_value();
+    }
+
+    // returns 0 if element_count is an invalid size
+    HURCHALLA_FORCE_INLINE static constexpr
+    std::size_t dataSizeBytes(size_type element_count)
+    {
+        return decltype(impl_buv)::dataSizeBytes(element_count);
     }
 
     HURCHALLA_FORCE_INLINE std::size_t dataSizeBytes() const
@@ -77,9 +96,9 @@ struct BitpackedUintVector
     // Then later you or another person can call getFormatID() as part of a
     // handshake to ensure matching format ID, prior to using that serialized
     // data (for the BitpackedUintVector constructor).
-    HURCHALLA_FORCE_INLINE static constexpr uint64_t getFormatID()
+    HURCHALLA_FORCE_INLINE static constexpr uint32_t getFormatID()
     {
-        return detail::ImplBitpackedUintVector<U,element_bitlen>::getFormatID();
+        return decltype(impl_buv)::getFormatID();
     }
 
 private:

diff --git a/include/hurchalla/util/detail/ImplBitpackedUintVector.h b/include/hurchalla/util/detail/ImplBitpackedUintVector.h
@@ -78,13 +78,41 @@ struct ImplBitpackedUintVector
                 - 1 + (static_cast<U>(1) << (element_bitlen-1));
     }
 
-    static constexpr uint64_t getFormatID()
+    static constexpr uint32_t getFormatID()
     {
         // The constant used below should never be changed (change the
         // internal dataVersion if this file changes to use a different
         // data format/data encoding).  The exact constant value used below
         // was just a random number, but it needs to stay constant.
-        return UINT64_C(6595123244124255148) + dataVersion;
+        return UINT32_C(2132441242) + dataVersion;
+    }
+
+    // returns 0 if count is an invalid size
+    static constexpr
+    std::size_t dataSizeBytes(size_type count)
+    {
+        std::size_t starting_byte, bit_offset;
+        bool overflowed;
+        attemptGetLocationFromIndex(count, starting_byte, bit_offset, overflowed);
+        if (overflowed)
+           return 0;
+
+        constexpr std::size_t MAXSIZET= std::numeric_limits<std::size_t>::max();
+        std::size_t bytes_needed = starting_byte;
+        if (bit_offset != 0) {
+            HPBC_CONSTEXPR_ASSERT(bit_offset < 8);
+            if (bytes_needed == MAXSIZET)
+                return 0;
+            else
+                ++bytes_needed;
+        }
+        // We want to allocate one byte more than we strictly need, so that
+        // setAt() can safely write one byte beyond where it strictly should.
+        if (bytes_needed == MAXSIZET)
+            return 0;
+        else
+            ++bytes_needed;
+        return bytes_needed;
     }
 
     std::size_t dataSizeBytes() const
@@ -103,45 +131,25 @@ struct ImplBitpackedUintVector
     // each of size element_bitlen.
     static std::size_t getBytesFromCount(size_type count)
     {
-        std::size_t starting_byte, bit_offset;
-        bool overflowed;
-        attemptGetLocationFromIndex(count, starting_byte, bit_offset, overflowed);
-        if (overflowed)
+        std::size_t bytes_needed = dataSizeBytes(count);
+        if (bytes_needed == 0)
            throw std::length_error("BitpackedUintVector size too large, would overflow");
 
-        // Since attemptGetLocationFromIndex() returned without overflow, we
-        // know that any value <= 'count' can be converted into 'starting_byte'
-        // and 'bit_offset' correctly (i.e. without overflow).  This lets us
+        // Since dataSizeBytes() returned without overflow, we know that any
+        // value <= 'count' can be converted into 'starting_byte' and
+        // 'bit_offset' correctly (i.e. without overflow).  This lets us
         // establish the class invariant that any index that satisfies
         // (index < count) will be converted without overflow by
         // attemptGetLocationFromIndex().
 
-        constexpr std::size_t MAXSIZET= std::numeric_limits<std::size_t>::max();
-
-        std::size_t bytes_needed = starting_byte;
-        if (bit_offset != 0) {
-            HPBC_ASSERT2(bit_offset < 8);
-            if (bytes_needed == MAXSIZET)
-                throw std::length_error("BitpackedUintVector size too large, would overflow");
-            else
-                ++bytes_needed;
-        }
-
-        // We want to allocate one byte more than we strictly need, so that
-        // setAt() can safely write one byte beyond where it strictly should.
-        if (bytes_needed == MAXSIZET)
-            throw std::length_error("BitpackedUintVector size too large, would overflow");
-        else
-            ++bytes_needed;
-
         return bytes_needed;
     }
 
 
     // if this function returns with overflowed == false,
     // then it guarantees any value <= 'index' can be converted into
     // 'starting_byte' and 'bit_offset' correctly (i.e. without overflow).
-    HURCHALLA_FORCE_INLINE static
+    HURCHALLA_FORCE_INLINE static constexpr
     void attemptGetLocationFromIndex(size_type index,
           std::size_t& starting_byte, std::size_t& bit_offset, bool& overflowed)
     {
@@ -191,7 +199,7 @@ struct ImplBitpackedUintVector
         bit_offset = static_cast<std::size_t>(
                                      (static_cast<P>(index) * ELEM_BITLEN) % 8);
 #endif
-        HPBC_POSTCONDITION2(bit_offset < 8);
+        HPBC_CONSTEXPR_POSTCONDITION(bit_offset < 8);
     }
 
 

diff --git a/msvc_build_tests.bat b/msvc_build_tests.bat
@@ -8,7 +8,7 @@ REM cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_LIBS=ON -G "Visual Studio 15"
 REM the above line appears to build x86-32.  To get x64:
 REM cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_LIBS=ON -G "Visual Studio 15 2017 Win64"
 
-cmake -DTEST_HURCHALLA_LIBS=ON -S. -B.\%build_dir%
+cmake -S. -B.\%build_dir% -DTEST_HURCHALLA_LIBS=ON -G "Visual Studio 17 2022" -A Win32
 if %errorlevel% neq 0 exit /b %errorlevel%
 cmake --build .\%build_dir% --config Release
 if %errorlevel% neq 0 exit /b %errorlevel%

diff --git a/test/test_BitpackedUintVector.cpp b/test/test_BitpackedUintVector.cpp
@@ -30,7 +30,7 @@ void check_buv(std::vector<uint64_t>& vec)
     EXPECT_TRUE(buv.max_allowed_value() == mask);
 
     // nothing to test with getFormatID() except to check it compiles
-    uint64_t id = buv.getFormatID();
+    constexpr uint32_t id = buv.getFormatID();
     (void)id;
 
     for (size_type i = 0; i < vec.size(); ++i) {
@@ -47,8 +47,12 @@ void check_buv(std::vector<uint64_t>& vec)
 
     // serialize
     const unsigned char* data = buv.data();
-    std::size_t num_bytes = buv.dataSizeBytes();
     size_type num_elements = buv.size();
+    std::size_t num_bytes = buv.dataSizeBytes();
+
+        // side test that the constexpr dataSizeBytes() is valid
+    std::size_t num_bytes2 = decltype(buv)::dataSizeBytes(num_elements);
+    EXPECT_TRUE(num_bytes == num_bytes2);
 
     // use memcpy to roughly fake write/read to file, network, etc.
     std::unique_ptr<unsigned char[]> buffer(new unsigned char[num_bytes]());