Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: MovingInterquartileMean #13730

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions src/test/movinginterquartilemean_test.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#include "util/movinginterquartilemean.h"

#include <benchmark/benchmark.h>
#include <gtest/gtest.h>
#include <QDebug>

#include "util/movinginterquartilemean.h"
#include <QDebug>
#include <random>

namespace {

Expand Down Expand Up @@ -109,4 +112,28 @@ TEST_F(MovingInterquartileMeanTest, doubles9) {
}
}

void BM_MovingIQM_Insertion(benchmark::State& state) {
std::mt19937 gen; // explicitly don't seed for reproducibility
JoergAtGithub marked this conversation as resolved.
Show resolved Hide resolved
std::uniform_real_distribution<> dis(0.0, 1.0);
// first quarter of the test will be plain insertion
// the remaining three quarters will evict the oldest value
std::size_t num_iters = state.range(0) * 4;
for (auto _ : state) {
MovingInterquartileMean iqm(state.range(0));
for (double i = 0; i < num_iters; ++i) {
benchmark::DoNotOptimize(iqm.insert(dis(gen)));
}
}
state.SetItemsProcessed(state.iterations() * num_iters);
}

BENCHMARK(BM_MovingIQM_Insertion)
->RangeMultiplier(2)
->Range(1 << 1, 1 << 10)
// the benchmark is so slow, it usually only gets a single iteration
// each, so manually force a couple more
->Repetitions(100)
// don't spam the output with the individual repetition data
->DisplayAggregatesOnly()
->Unit(benchmark::kMicrosecond);
} // namespace
103 changes: 45 additions & 58 deletions src/util/movinginterquartilemean.cpp
Original file line number Diff line number Diff line change
@@ -1,46 +1,44 @@
#include "movinginterquartilemean.h"

MovingInterquartileMean::MovingInterquartileMean(const unsigned int listMaxSize)
: m_dMean(0.0),
m_iListMaxSize(listMaxSize),
m_bChanged(true) {
}
#include <qlogging.h>

#include <algorithm>
#include <cstddef>
#include <iterator>
#include <numeric>

MovingInterquartileMean::~MovingInterquartileMean() {};
#include "util/assert.h"

double MovingInterquartileMean::insert(double value) {
// make space if needed
// TODO (Swiftb0y): benchmark and see if it makes sense to optimize here by instead of
// removing + insertion (2x O(n)) (likely large n), we should rotate+swap (O(n)).
if (m_list.size() == m_list.capacity()) {
m_list.erase(std::lower_bound(m_list.begin(), m_list.end(), m_queue.front()));
m_queue.pop();
}
auto insertPosition = std::lower_bound(m_list.cbegin(), m_list.cend(), value);
m_list.insert(insertPosition, value);
// we explicitly insert the value and not an index or iterator here,
// because those would get invalidated when the contents of m_list are
// shifted around (due to the erase and insert above). updating those
// iterators/indices is likely more expensive than recovering them when
// needed using the first std::lower_bound
m_queue.push(value);

m_bChanged = true;

// Insert new value
if (m_list.empty()) {
m_list.push_front(value);
m_queue.enqueue(m_list.begin());
} else if (value < m_list.front()) {
m_list.push_front(value);
m_queue.enqueue(m_list.begin());
} else if (value >= m_list.back()) {
m_list.push_back(value);
m_queue.enqueue(--m_list.end());
} else {
std::list<double>::iterator it = m_list.begin()++;
while (value >= *it) {
++it;
}
m_queue.enqueue(m_list.insert(it, value));
// (If value already exists in the list, the new instance
// is appended next to the old ones: 2·-> 1 2 3 = 1 2 2· 3)
}
DEBUG_ASSERT(std::is_sorted(m_list.cbegin(), m_list.cend()));

// If list was already full, delete the oldest value:
if (m_list.size() == static_cast<std::size_t>(m_iListMaxSize + 1)) {
m_list.erase(m_queue.dequeue());
}
return mean();
}

void MovingInterquartileMean::clear() {
m_bChanged = true;
m_queue.clear();
// std::queue has no .clear(), so creating a temporary and std::swap is the
// next most elegant solution
std::queue<double> empty;
std::swap(m_queue, empty);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't
std::queue<double>().swap(m_queue);
work too? Not tested.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, yeah that should work.

m_list.clear();
}

Expand All @@ -49,47 +47,36 @@ double MovingInterquartileMean::mean() {
return m_dMean;
}

m_bChanged = false;
const int listSize = size();
auto simpleMean = [](auto begin, auto end) -> double {
double size = std::distance(begin, end);
return std::accumulate(begin, end, 0.0) / size;
};

const auto listSize = m_list.size();
if (listSize <= 4) {
double d_sum = 0;
for (const double d : std::as_const(m_list)) {
d_sum += d;
}
m_dMean = d_sum / listSize;
m_dMean = simpleMean(m_list.cbegin(), m_list.cend());
} else if (listSize % 4 == 0) {
int quartileSize = listSize / 4;
double interQuartileRange = 2 * quartileSize;
double d_sum = 0;
std::list<double>::iterator it = m_list.begin();
std::advance(it, quartileSize);
for (int k = 0; k < 2 * quartileSize; ++k, ++it) {
d_sum += *it;
}
m_dMean = d_sum / interQuartileRange;
std::size_t quartileSize = listSize / 4;
auto start = m_list.cbegin() + quartileSize;
auto end = m_list.cend() - quartileSize;
m_dMean = simpleMean(start, end);
} else {
// http://en.wikipedia.org/wiki/Interquartile_mean#Dataset_not_divisible_by_four
double quartileSize = listSize / 4.0;
double interQuartileRange = 2 * quartileSize;
int nFullValues = listSize - 2 * static_cast<int>(quartileSize) - 2;
std::size_t nFullValues = listSize - 2 * static_cast<std::size_t>(quartileSize) - 2;
double quartileWeight = (interQuartileRange - nFullValues) / 2;
std::list<double>::iterator it = m_list.begin();
std::advance(it, static_cast<int>(quartileSize));
auto it = m_list.begin();
std::advance(it, static_cast<std::size_t>(quartileSize));
double d_sum = *it * quartileWeight;
++it;
for (int k = 0; k < nFullValues; ++k, ++it) {
for (std::size_t k = 0; k < nFullValues; ++k, ++it) {
d_sum += *it;
}
d_sum += *it * quartileWeight;
m_dMean = d_sum / interQuartileRange;
}
return m_dMean;
}

int MovingInterquartileMean::size() const {
return static_cast<int>(m_list.size());
}
m_bChanged = false;

int MovingInterquartileMean::listMaxSize() const {
return m_iListMaxSize;
return m_dMean;
}
29 changes: 16 additions & 13 deletions src/util/movinginterquartilemean.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once

#include <QQueue>
#include <list>
#include <queue>
#include <vector>

// Truncated Interquartile mean

Expand All @@ -13,8 +13,11 @@
class MovingInterquartileMean {
public:
// Constructs an empty MovingTruncatedIQM.
MovingInterquartileMean(const unsigned int listLength);
virtual ~MovingInterquartileMean();
MovingInterquartileMean(std::size_t listLength)
: m_dMean(0.0),
m_bChanged(true) {
m_list.reserve(listLength);
}

// Inserts value to the list and returns the new truncated mean.
double insert(double value);
Expand All @@ -23,18 +26,18 @@ class MovingInterquartileMean {
// Returns the current truncated mean. Input list must not be empty.
double mean();
// Returns how many values have been input.
int size() const;
// Returns the maximum size of the input list.
int listMaxSize() const;
int size() const {
return static_cast<int>(m_list.size());
}

private:
double m_dMean;
int m_iListMaxSize;
// The list keeps input doubles ordered by value.
std::list<double> m_list;
// The queue keeps pointers to doubles in the list ordered
// by the order they were received.
QQueue<std::list<double>::iterator> m_queue;
std::vector<double> m_list;
// The queue keeps a second copy of the list, but in insertion
// order. This is to track which value we need to evict in order
// not stay within memory constraints.
std::queue<double> m_queue;
double m_dMean;

// sum() checks this to know if it has to recalculate the mean.
bool m_bChanged;
Expand Down
Loading