diff --git a/src/test/movinginterquartilemean_test.cpp b/src/test/movinginterquartilemean_test.cpp index 797c1136d4e..dc0961c866f 100644 --- a/src/test/movinginterquartilemean_test.cpp +++ b/src/test/movinginterquartilemean_test.cpp @@ -1,7 +1,10 @@ +#include "util/movinginterquartilemean.h" + +#include #include -#include -#include "util/movinginterquartilemean.h" +#include +#include namespace { @@ -109,4 +112,28 @@ TEST_F(MovingInterquartileMeanTest, doubles9) { } } +void BM_MovingIQM_Insertion(benchmark::State& state) { + std::mt19937 gen; // explicitly don't seed for reproducibility + std::uniform_real_distribution<> dis(0.0, 1.0); + // first quarter of the test will be plain insertion + // the remaining three quarters will evict the oldest value + std::size_t num_iters = state.range(0) * 4; + for (auto _ : state) { + MovingInterquartileMean iqm(state.range(0)); + for (double i = 0; i < num_iters; ++i) { + benchmark::DoNotOptimize(iqm.insert(dis(gen))); + } + } + state.SetItemsProcessed(state.iterations() * num_iters); +} + +BENCHMARK(BM_MovingIQM_Insertion) + ->RangeMultiplier(2) + ->Range(1 << 1, 1 << 10) + // the benchmark is so slow, it usually only gets a single iteration + // each, so manually force a couple more + ->Repetitions(100) + // don't spam the output with the individual repetition data + ->DisplayAggregatesOnly() + ->Unit(benchmark::kMicrosecond); } // namespace diff --git a/src/util/movinginterquartilemean.cpp b/src/util/movinginterquartilemean.cpp index a878e1ae4b3..d1952270e57 100644 --- a/src/util/movinginterquartilemean.cpp +++ b/src/util/movinginterquartilemean.cpp @@ -1,46 +1,44 @@ #include "movinginterquartilemean.h" -MovingInterquartileMean::MovingInterquartileMean(const unsigned int listMaxSize) - : m_dMean(0.0), - m_iListMaxSize(listMaxSize), - m_bChanged(true) { -} +#include -MovingInterquartileMean::~MovingInterquartileMean() {}; +#include +#include +#include +#include -double MovingInterquartileMean::insert(double value) { - m_bChanged = true; +#include "util/assert.h" - // Insert new value - if (m_list.empty()) { - m_list.push_front(value); - m_queue.enqueue(m_list.begin()); - } else if (value < m_list.front()) { - m_list.push_front(value); - m_queue.enqueue(m_list.begin()); - } else if (value >= m_list.back()) { - m_list.push_back(value); - m_queue.enqueue(--m_list.end()); - } else { - std::list::iterator it = m_list.begin()++; - while (value >= *it) { - ++it; - } - m_queue.enqueue(m_list.insert(it, value)); - // (If value already exists in the list, the new instance - // is appended next to the old ones: 2·-> 1 2 3 = 1 2 2· 3) +double MovingInterquartileMean::insert(double value) { + // make space if needed + // NOTE: after benchmarking, replacing the erase+insert with a rotate+swap does + // not result in significant enough speedup to warrant the complexity. + if (m_list.size() == m_list.capacity()) { + m_list.erase(std::lower_bound(m_list.begin(), m_list.end(), m_queue.front())); + m_queue.pop(); } + auto insertPosition = std::lower_bound(m_list.cbegin(), m_list.cend(), value); + m_list.insert(insertPosition, value); + // we explicitly insert the value and not an index or iterator here, + // because those would get invalidated when the contents of m_list are + // shifted around (due to the erase and insert above). updating those + // iterators/indices is likely more expensive than recovering them when + // needed using the first std::lower_bound + m_queue.push(value); - // If list was already full, delete the oldest value: - if (m_list.size() == static_cast(m_iListMaxSize + 1)) { - m_list.erase(m_queue.dequeue()); - } - return mean(); + DEBUG_ASSERT(std::is_sorted(m_list.cbegin(), m_list.cend())); + + // no need to set m_bChanged and check m_list.empty(). + // we know the preconditions are satisfied so call `calcMean()` directly + m_dMean = calcMean(); + return m_dMean; } void MovingInterquartileMean::clear() { m_bChanged = true; - m_queue.clear(); + // std::queue has no .clear(), so creating a temporary and std::swap is the + // next most elegant solution + std::queue().swap(m_queue); m_list.clear(); } @@ -48,48 +46,40 @@ double MovingInterquartileMean::mean() { if (!m_bChanged || m_list.empty()) { return m_dMean; } - + m_dMean = calcMean(); m_bChanged = false; - const int listSize = size(); + return m_dMean; +} + +double MovingInterquartileMean::calcMean() const { + // assumes m_list is not empty + auto simpleMean = [](auto begin, auto end) -> double { + double size = std::distance(begin, end); + return std::accumulate(begin, end, 0.0) / size; + }; + + const auto listSize = m_list.size(); if (listSize <= 4) { - double d_sum = 0; - for (const double d : std::as_const(m_list)) { - d_sum += d; - } - m_dMean = d_sum / listSize; + return simpleMean(m_list.cbegin(), m_list.cend()); } else if (listSize % 4 == 0) { - int quartileSize = listSize / 4; - double interQuartileRange = 2 * quartileSize; - double d_sum = 0; - std::list::iterator it = m_list.begin(); - std::advance(it, quartileSize); - for (int k = 0; k < 2 * quartileSize; ++k, ++it) { - d_sum += *it; - } - m_dMean = d_sum / interQuartileRange; + std::size_t quartileSize = listSize / 4; + auto start = m_list.cbegin() + quartileSize; + auto end = m_list.cend() - quartileSize; + return simpleMean(start, end); } else { // http://en.wikipedia.org/wiki/Interquartile_mean#Dataset_not_divisible_by_four double quartileSize = listSize / 4.0; double interQuartileRange = 2 * quartileSize; - int nFullValues = listSize - 2 * static_cast(quartileSize) - 2; + std::size_t nFullValues = listSize - 2 * static_cast(quartileSize) - 2; double quartileWeight = (interQuartileRange - nFullValues) / 2; - std::list::iterator it = m_list.begin(); - std::advance(it, static_cast(quartileSize)); + auto it = m_list.begin(); + std::advance(it, static_cast(quartileSize)); double d_sum = *it * quartileWeight; ++it; - for (int k = 0; k < nFullValues; ++k, ++it) { + for (std::size_t k = 0; k < nFullValues; ++k, ++it) { d_sum += *it; } d_sum += *it * quartileWeight; - m_dMean = d_sum / interQuartileRange; + return d_sum / interQuartileRange; } - return m_dMean; -} - -int MovingInterquartileMean::size() const { - return static_cast(m_list.size()); -} - -int MovingInterquartileMean::listMaxSize() const { - return m_iListMaxSize; } diff --git a/src/util/movinginterquartilemean.h b/src/util/movinginterquartilemean.h index 1d4c668ca54..434232442f6 100644 --- a/src/util/movinginterquartilemean.h +++ b/src/util/movinginterquartilemean.h @@ -1,7 +1,7 @@ #pragma once -#include -#include +#include +#include // Truncated Interquartile mean @@ -13,8 +13,11 @@ class MovingInterquartileMean { public: // Constructs an empty MovingTruncatedIQM. - MovingInterquartileMean(const unsigned int listLength); - virtual ~MovingInterquartileMean(); + MovingInterquartileMean(std::size_t listLength) + : m_dMean(0.0), + m_bChanged(true) { + m_list.reserve(listLength); + } // Inserts value to the list and returns the new truncated mean. double insert(double value); @@ -23,18 +26,19 @@ class MovingInterquartileMean { // Returns the current truncated mean. Input list must not be empty. double mean(); // Returns how many values have been input. - int size() const; - // Returns the maximum size of the input list. - int listMaxSize() const; + int size() const { + return static_cast(m_list.size()); + } private: - double m_dMean; - int m_iListMaxSize; + double calcMean() const; // The list keeps input doubles ordered by value. - std::list m_list; - // The queue keeps pointers to doubles in the list ordered - // by the order they were received. - QQueue::iterator> m_queue; + std::vector m_list; + // The queue keeps a second copy of the list, but in insertion + // order. This is to track which value we need to evict in order + // not stay within memory constraints. + std::queue m_queue; + double m_dMean; // sum() checks this to know if it has to recalculate the mean. bool m_bChanged;