Skip to content

Commit

Permalink
new thread pool
Browse files Browse the repository at this point in the history
  • Loading branch information
wkjarosz committed Mar 1, 2024
1 parent 9b6c7de commit 94ba0b8
Show file tree
Hide file tree
Showing 7 changed files with 1,017 additions and 71 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ CPMAddPackage(
GIT_TAG 71bffa3d7d89e46a37108732f5e63d554c10e018
OPTIONS ${OPENEXR_OPTIONS}
)
if(openexr_ADDED)
message(STATUS "openexr library added")
endif()

# # this is part of Dear ImGui Bundle, so no need to add it separately
# CPMAddPackage("gh:epezent/implot#f156599faefe316f7dd20fe6c783bf87c8bb6fd9") if(implot_ADDED) message(STATUS "implot
Expand Down Expand Up @@ -337,6 +340,7 @@ imgui_bundle_add_app(
src/parallelfor.cpp
src/progress.cpp
src/opengl_check.cpp
src/scheduler.cpp
src/shader.cpp
src/shader_gl.cpp
src/renderpass_gl.cpp
Expand Down
166 changes: 120 additions & 46 deletions src/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@

using namespace std;

//
// static local functions
//

//
// static methods and member definitions
//
Expand Down Expand Up @@ -74,21 +78,6 @@ std::set<std::string> Image::savable_formats()
// end static methods
//

void PixelStats::set_invalid()
{
exposure = 0.f;
minimum = 0.f;
maximum = 1.f;
average = 0.5f;
hist_x_scale = AxisScale_Linear;
hist_y_scale = AxisScale_Linear;
hist_y_limits = {0.f, 1.f};
hist_normalization = {0.f, 1.f};
nan_pixels = 0;
inf_pixels = 0;
computed = false;
}

float2 PixelStats::x_limits(float e, AxisScale_ scale) const
{
bool LDR_scale = scale == AxisScale_Linear || scale == AxisScale_SRGB;
Expand All @@ -108,54 +97,129 @@ float2 PixelStats::x_limits(float e, AxisScale_ scale) const
return ret;
}

PixelStats::PixelStats(const Array2Df &img, float the_exposure, AxisScale_ x_scale, AxisScale_ y_scale,
AtomicProgress &prog) :
exposure(the_exposure),
minimum(std::numeric_limits<float>::infinity()), maximum(-std::numeric_limits<float>::infinity()), average(0.f),
hist_x_scale{x_scale}, hist_y_scale{y_scale}
// void PixelStats::reset(float new_exposure, AxisScale_ new_x_scale, AxisScale_ new_y_scale)
// {
// *this = PixelStats();
// exposure = new_exposure;
// // minimum = std::numeric_limits<float>::infinity();
// // maximum = -std::numeric_limits<float>::infinity();
// // average = 0.0f;
// hist_x_scale = new_x_scale;
// hist_y_scale = new_y_scale;
// // hist_y_limits = {0.f, 1.f};
// // hist_normalization = {0.f, 1.f};
// // nan_pixels = 0;
// // inf_pixels = 0;
// // hist_xs.fill(0.f);
// // hist_ys.fill(0.f);
// // computed = false;
// }

void PixelStats::calculate(const Array2Df &img, float new_exposure, AxisScale_ new_x_scale, AxisScale_ new_y_scale,
AtomicProgress &prog)
{
try
{
spdlog::trace("Computing pixel statistics");
spdlog::debug("Num threads in scheduler {}", Scheduler::singleton()->getNumThreads());

// initialize values
*this = PixelStats();
exposure = new_exposure;
hist_x_scale = new_x_scale;
hist_y_scale = new_y_scale;

//
// compute pixel summary statistics
int valid_pixels = 0;
double accum = 0.0; // reduce numerical precision issues by accumulating in double
for (int i = 0; i < img.num_elements(); ++i)

Timer timer;
{
if (prog.canceled())
throw std::exception();
constexpr int numThreads = 4;
struct Stats
{
float minimum = std::numeric_limits<float>::infinity();
float maximum = -std::numeric_limits<float>::infinity();
double average = 0.0f;
int nan_pixels = 0;
int inf_pixels = 0;
int valid_pixels = 0;
bool exception = false;
};
std::array<Stats, max(1, numThreads)> partials;

parallel_for(
blocked_range<int>(0, img.num_elements(), 1024),
[&img, &partials, &prog](int begin, int end, int unit_index, int thread_index)
{
Stats partial = partials[unit_index]; //< compute over local symbols.
if (partial.exception)
return;

try
{
for (int i = begin; i != end; ++i)
{
if (prog.canceled())
throw std::exception();

// computation
float val = img(i);

if (isnan(val))
++partial.nan_pixels;
else if (isinf(val))
++partial.inf_pixels;
else
{
++partial.valid_pixels;
partial.maximum = std::max(partial.maximum, val);
partial.minimum = std::min(partial.minimum, val);
partial.average += val;
}
}
}
catch (...)
{
partial.exception = true;
}

partials[unit_index] = partial; //< Store partials at the end.
},
numThreads);

// final reduction from partial results
double accum = 0.f;
int valid_pixels = 0;
for (auto &p : partials)
{
if (p.exception)
throw std::exception();

float val = img(i);
minimum = std::min(p.minimum, minimum);
maximum = std::max(p.maximum, maximum);
nan_pixels += p.nan_pixels;
inf_pixels += p.inf_pixels;

if (isnan(val))
++nan_pixels;
else if (isinf(val))
++inf_pixels;
else
{
++valid_pixels;
maximum = std::max(maximum, val);
minimum = std::min(minimum, val);
accum += val;
valid_pixels += p.valid_pixels;
accum += p.average;
}
average = valid_pixels ? float(accum / valid_pixels) : 0.f;
}
average = valid_pixels ? float(accum / valid_pixels) : 0.f;

spdlog::trace("Min: {}\nMean: {}\nMax: {}", minimum, average, maximum);
spdlog::trace("Summary stats computed in {} ms:\nMin: {}\nMean: {}\nMax: {}", timer.elapsed(), minimum, average,
maximum);
//

//
// compute histograms

bool LDR_scale = x_scale == AxisScale_Linear || x_scale == AxisScale_SRGB;
bool LDR_scale = hist_x_scale == AxisScale_Linear || hist_x_scale == AxisScale_SRGB;

auto hist_x_limits = x_limits(exposure, x_scale);
auto hist_x_limits = x_limits(exposure, hist_x_scale);

hist_normalization[0] = axis_scale_fwd_xform(LDR_scale ? hist_x_limits[0] : minimum, &x_scale);
hist_normalization[0] = axis_scale_fwd_xform(LDR_scale ? hist_x_limits[0] : minimum, &hist_x_scale);
hist_normalization[1] =
axis_scale_fwd_xform(LDR_scale ? hist_x_limits[1] : maximum, &x_scale) - hist_normalization[0];
axis_scale_fwd_xform(LDR_scale ? hist_x_limits[1] : maximum, &hist_x_scale) - hist_normalization[0];

// compute bin center values
for (int i = 0; i < NUM_BINS; ++i) hist_xs[i] = bin_to_value(i + 0.5);
Expand All @@ -182,7 +246,7 @@ PixelStats::PixelStats(const Array2Df &img, float the_exposure, AxisScale_ x_sca
auto idx = ys.size() - 10;
std::nth_element(ys.begin(), ys.begin() + idx, ys.end());
// for logarithmic y-axis, we need a non-zero lower y-limit, so use half the smallest possible value
hist_y_limits[0] = y_scale == AxisScale_Linear ? 0.f : hist_y_limits[0]; // / 2.f;
hist_y_limits[0] = hist_y_scale == AxisScale_Linear ? 0.f : hist_y_limits[0]; // / 2.f;
// for upper y-limit, use the 10th largest value if its non-zero, then the largest, then just 1
if (ys[idx] != 0.f)
hist_y_limits[1] = ys[idx] * 1.15f;
Expand All @@ -198,7 +262,7 @@ PixelStats::PixelStats(const Array2Df &img, float the_exposure, AxisScale_ x_sca
catch (const std::exception &e)
{
spdlog::trace("Canceling pixel stats computation");
set_invalid();
*this = PixelStats(); // reset
}
}

Expand Down Expand Up @@ -288,9 +352,19 @@ void Channel::update_stats()
async_stats = PixelStats::Task(
[this, desired_settings](AtomicProgress &prog)
{
return make_shared<PixelStats>(*this, desired_settings.exposure, desired_settings.x_scale,
desired_settings.y_scale, prog);
auto ret = make_shared<PixelStats>();
ret->calculate(*this, desired_settings.exposure, desired_settings.x_scale, desired_settings.y_scale,
prog);
return ret;
});

// async_tracker = do_async(
// [this, desired_settings](AtomicProgress &prog)
// {
// async_stats2 = make_shared<PixelStats>(*this, desired_settings.exposure, desired_settings.x_scale,
// desired_settings.y_scale, prog);
// },
// progress);
async_settings = desired_settings;
async_stats.compute();
};
Expand Down
27 changes: 16 additions & 11 deletions src/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "colorspace.h"
#include "common.h"
#include "fwd.h"
#include "scheduler.h"
#include <cfloat>
#include <map>
#include <set>
Expand Down Expand Up @@ -67,28 +68,28 @@ struct PixelStats
};

float exposure = 0.f;
float minimum = 0.f;
float maximum = 1.f;
float minimum = std::numeric_limits<float>::infinity();
float maximum = -std::numeric_limits<float>::infinity();
float average = 0.0f;
int nan_pixels = 0;
int inf_pixels = 0;
bool computed = false; ///< Did we finish computing the stats?

bool computed = false; ///< Did we finish computing the stats?

// histogram
AxisScale_ hist_x_scale = AxisScale_Linear;
AxisScale_ hist_y_scale = AxisScale_Linear;
float2 hist_y_limits = {0.f, 1.f};
float2 hist_normalization = {0.f, 1.f};

float2 x_limits(float exposure, AxisScale_ x_scale) const;

std::array<float, NUM_BINS> hist_xs{};
std::array<float, NUM_BINS> hist_ys{};
std::array<float, NUM_BINS> hist_xs{}; // {}: value-initialized to zeros
std::array<float, NUM_BINS> hist_ys{}; // {}: value-initialized to zeros

PixelStats() = default;
PixelStats(const Array2Df &img, float new_exposure, AxisScale_ x_scale, AxisScale_ y_scale, AtomicProgress &prog);

void set_invalid();
/// Populate the statistics from the provided img and settings
void calculate(const Array2Df &img, float exposure, AxisScale_ x_scale, AxisScale_ y_scale, AtomicProgress &prog);

Settings settings() const { return {exposure, hist_x_scale, hist_y_scale}; }

int clamp_idx(int i) const { return std::clamp(i, 0, NUM_BINS - 1); }
Expand All @@ -107,6 +108,8 @@ struct PixelStats
return axis_scale_inv_xform(hist_normalization[1] * value * inv_bins + hist_normalization[0],
(void *)&hist_x_scale);
}

float2 x_limits(float exposure, AxisScale_ x_scale) const;
};

struct Channel : public Array2Df
Expand All @@ -130,8 +133,10 @@ struct Channel : public Array2Df
void update_stats();

private:
PixelStats::Ptr cached_stats;
PixelStats::Task async_stats;
PixelStats::Ptr cached_stats;
PixelStats::Task async_stats;
// Scheduler::TaskTracker async_tracker;
// PixelStats::Ptr async_stats2;
PixelStats::Settings async_settings{};
// bool stats_dirty = true;
};
Expand Down
12 changes: 6 additions & 6 deletions src/parallelfor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@ using namespace std;

// adapted from http://www.andythomason.com/2016/08/21/c-multithreading-an-effective-parallel-for-loop/
// license unknown, presumed public domain
void parallel_for(int begin, int end, int step, function<void(int, size_t)> body, bool serial)
void parallel_for(int begin, int end, int step, function<void(int, size_t)> body, size_t num_threads)
{
atomic<int> nextIndex;
nextIndex = begin;

#if defined(__EMSCRIPTEN__)
// shouldn't use this simple async-based parallel_for with emscripten since, even if compiled with pthread support,
// the async would block on the mail thread, which is a no-no
serial = true;
num_threads = 1;
#endif

auto policy = serial ? std::launch::deferred : std::launch::async;
size_t numCPUs = thread::hardware_concurrency();
auto policy = num_threads == 1 ? std::launch::deferred : std::launch::async;
size_t numCPUs = num_threads == 0 ? thread::hardware_concurrency() : num_threads;
vector<future<void>> futures(numCPUs);
for (size_t cpu = 0; cpu != numCPUs; ++cpu)
{
Expand All @@ -44,8 +44,8 @@ void parallel_for(int begin, int end, int step, function<void(int, size_t)> body
for (auto &f : futures) f.get();
}

void parallel_for(int begin, int end, int step, function<void(int)> body, bool serial)
void parallel_for(int begin, int end, int step, function<void(int)> body, size_t num_threads)
{
parallel_for(
begin, end, step, [&body](int i, size_t) { body(i); }, serial);
begin, end, step, [&body](int i, size_t) { body(i); }, num_threads);
}
16 changes: 8 additions & 8 deletions src/parallelfor.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,25 @@
* @param begin The starting index of the for loop
* @param end One past the ending index of the for loop
* @param step How much to increment at each iteration when moving from begin to end
* @param body The body of the for loop as a lambda, taking two parameters: the iterator index in [begin,end), and the
* CPU number
* @param body The body of the for loop as a lambda, taking two parameters: the iterator index in [begin,end), and
* the CPU number
* @param serial Force the loop to execute in serial instead of parallel
*/
void parallel_for(int begin, int end, int step, std::function<void(int, size_t)> body, bool serial = false);
void parallel_for(int begin, int end, int step, std::function<void(int, size_t)> body, size_t num_threads = 0);

/*!
* @brief A version of the parallel_for accepting a body lambda that only takes the iterator index as a parameter
*/
void parallel_for(int begin, int end, int step, std::function<void(int)> body, bool serial = false);
void parallel_for(int begin, int end, int step, std::function<void(int)> body, size_t num_threads = 0);

// adapted from http://www.andythomason.com/2016/08/21/c-multithreading-an-effective-parallel-for-loop/
// license unknown, presumed public domain
inline void parallel_for(int begin, int end, std::function<void(int, size_t)> body, bool serial = false)
inline void parallel_for(int begin, int end, std::function<void(int, size_t)> body, size_t num_threads = 0)
{
parallel_for(begin, end, 1, body);
parallel_for(begin, end, 1, body, num_threads);
}

inline void parallel_for(int begin, int end, std::function<void(int)> body, bool serial = false)
inline void parallel_for(int begin, int end, std::function<void(int)> body, size_t num_threads = 0)
{
parallel_for(begin, end, 1, body, serial);
parallel_for(begin, end, 1, body, num_threads);
}
Loading

0 comments on commit 94ba0b8

Please sign in to comment.