Skip to content

Commit

Permalink
Move mutable globals into a central structure
Browse files Browse the repository at this point in the history
This is a stab at an alternative to #7717, per discussion there. It doesn't necessarily have all the mutable globals moved here, but at least the ones we discussed. Better? Worse? What do you think?
  • Loading branch information
steven-johnson committed Jul 28, 2023
1 parent 649a224 commit e194a43
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 71 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ SOURCE_FILES = \
CodeGen_WebGPU_Dev.cpp \
CodeGen_X86.cpp \
CompilerLogger.cpp \
CompilerGlobals.cpp \
CPlusPlusMangle.cpp \
CSE.cpp \
Debug.cpp \
Expand Down Expand Up @@ -676,7 +677,7 @@ HEADER_FILES = \
CodeGen_PyTorch.h \
CodeGen_Targets.h \
CodeGen_WebGPU_Dev.h \
CompilerLogger.h \
CompilerGlobals.h \
ConciseCasts.h \
CPlusPlusMangle.h \
CSE.h \
Expand Down
3 changes: 3 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ set(HEADER_FILES
CodeGen_Targets.h
CodeGen_Vulkan_Dev.h
CodeGen_WebGPU_Dev.h
# No: we don't want this in Halide.h
# CompilerGlobals.h
CompilerLogger.h
ConciseCasts.h
CPlusPlusMangle.h
Expand Down Expand Up @@ -211,6 +213,7 @@ set(SOURCE_FILES
CodeGen_WebAssembly.cpp
CodeGen_WebGPU_Dev.cpp
CodeGen_X86.cpp
CompilerGlobals.cpp
CompilerLogger.cpp
CPlusPlusMangle.cpp
CSE.cpp
Expand Down
43 changes: 43 additions & 0 deletions src/CompilerGlobals.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#include "CompilerGlobals.h"

#include "Util.h"

namespace Halide {
namespace Internal {

namespace {

Globals the_real_globals;

size_t compute_stack_size() {
std::string stack_size = get_env_variable("HL_COMPILER_STACK_SIZE");
if (stack_size.empty()) {
return default_compiler_stack_size;
} else {
return std::stoi(stack_size);
}
}

} // namespace

Globals &globals() {
return the_real_globals;
}

Globals::Globals() {
reset();
}

void Globals::reset() {
random_float_counter = 0;
random_uint_counter = 0;
random_variable_counter = 0;
for (int i = 0; i < num_unique_name_counters; i++) {
unique_name_counters[i] = 0;
}
tick_stack.clear();
compiler_stack_size = compute_stack_size();
}

} // namespace Internal
} // namespace Halide
62 changes: 62 additions & 0 deletions src/CompilerGlobals.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#ifndef HALIDE_COMPILER_GLOBALS_H
#define HALIDE_COMPILER_GLOBALS_H

/** \file
* Contains a struct that defines all of the Compiler's global state.
*/

#include <atomic>
#include <chrono>
#include <string>

#include "Util.h"

namespace Halide {
namespace Internal {

/** This struct is designed to contain all of the *mutable* global data
* used by the Halide compiler. (Global data that is declared const must
* not go here.) */
struct Globals {
struct TickStackEntry {
std::chrono::time_point<std::chrono::high_resolution_clock> time;
std::string file;
int line;
};

// A counter to use in random_float() calls.
std::atomic<int> random_float_counter;

// A counter to use in random_uint() and random_int() calls.
std::atomic<int> random_uint_counter;

// A counter to use in tagging random variables.
// Note that this will be reset by Internal::reset_random_counters().
std::atomic<int> random_variable_counter;

// Counters used for the unique_name() utilities.
std::atomic<int> unique_name_counters[num_unique_name_counters];

// The stack for tic/toc utilities
std::vector<TickStackEntry> tick_stack;

// The amount of stack space to use for the compiler, in bytes.
size_t compiler_stack_size;

Globals();

void reset();

private:
Globals(const Globals &copy) = delete;
Globals &operator=(const Globals &) = delete;
Globals(Globals &&) = delete;
Globals &operator=(Globals &&) = delete;
};

Globals &globals();

} // namespace Internal
} // namespace Halide

#endif
8 changes: 3 additions & 5 deletions src/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <utility>

#include "CSE.h"
#include "CompilerGlobals.h"
#include "Func.h"
#include "Function.h"
#include "IR.h"
Expand Down Expand Up @@ -299,9 +300,6 @@ class FreezeFunctions : public IRGraphVisitor {
}
};

// A counter to use in tagging random variables
std::atomic<int> rand_counter{0};

} // namespace

Function::Function(const FunctionPtr &ptr)
Expand Down Expand Up @@ -542,7 +540,7 @@ void Function::define(const vector<string> &args, vector<Expr> values) {
}

// Tag calls to random() with the free vars
int tag = rand_counter++;
int tag = globals().random_variable_counter++;
vector<VarOrRVar> free_vars;
free_vars.reserve(args.size());
for (const auto &arg : args) {
Expand Down Expand Up @@ -752,7 +750,7 @@ void Function::define_update(const vector<Expr> &_args, vector<Expr> values) {
free_vars.emplace_back(RVar(check.reduction_domain, i));
}
}
int tag = rand_counter++;
int tag = globals().random_variable_counter++;
for (auto &arg : args) {
arg = lower_random(arg, free_vars, tag);
}
Expand Down
9 changes: 4 additions & 5 deletions src/IROperator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <utility>

#include "CSE.h"
#include "CompilerGlobals.h"
#include "Debug.h"
#include "Func.h"
#include "IREquality.h"
Expand Down Expand Up @@ -2595,8 +2596,7 @@ Expr mod_round_to_zero(Expr x, Expr y) {

Expr random_float(Expr seed) {
// Random floats get even IDs
static std::atomic<int> counter{0};
int id = (counter++) * 2;
int id = (globals().random_float_counter++) * 2;

std::vector<Expr> args;
if (seed.defined()) {
Expand All @@ -2615,8 +2615,7 @@ Expr random_float(Expr seed) {

Expr random_uint(Expr seed) {
// Random ints get odd IDs
static std::atomic<int> counter{0};
int id = (counter++) * 2 + 1;
int id = (globals().random_uint_counter++) * 2 + 1;

std::vector<Expr> args;
if (seed.defined()) {
Expand All @@ -2632,7 +2631,7 @@ Expr random_uint(Expr seed) {
}

Expr random_int(Expr seed) {
return cast<int32_t>(random_uint(std::move(seed)));
return reinterpret<int32_t>(random_uint(std::move(seed)));
}

Expr likely(Expr e) {
Expand Down
4 changes: 4 additions & 0 deletions src/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "CodeGen_C.h"
#include "CodeGen_Internal.h"
#include "CodeGen_PyTorch.h"
#include "CompilerGlobals.h"
#include "CompilerLogger.h"
#include "Debug.h"
#include "HexagonOffload.h"
Expand Down Expand Up @@ -813,6 +814,7 @@ void compile_multitarget(const std::string &fn_name,
// This would make the filename outputs more symmetrical (ie the same for n=1 as for n>1)
// but at the expense of breaking existing users. So for now, we're going to continue
// with the legacy treatment below:
globals().reset();
module_factory(fn_name, base_target).compile(output_files);
return;
}
Expand Down Expand Up @@ -877,6 +879,8 @@ void compile_multitarget(const std::string &fn_name,
// We always produce the runtime separately, so add NoRuntime explicitly.
Target sub_fn_target = target.with_feature(Target::NoRuntime);

// Ensure that each subtarget sees the same sequence of random numbers
globals().reset();
{
ScopedCompilerLogger activate(compiler_logger_factory, sub_fn_name, sub_fn_target);
Module sub_module = module_factory(sub_fn_name, sub_fn_target);
Expand Down
75 changes: 19 additions & 56 deletions src/Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#endif

#include "Util.h"

#include "CompilerGlobals.h"
#include "Debug.h"
#include "Error.h"
#include "Introspection.h"
Expand Down Expand Up @@ -158,23 +160,10 @@ string running_program_name() {
}

namespace {
// We use 64K of memory to store unique counters for the purpose of
// making names unique. Using less memory increases the likelihood of
// hash collisions. This wouldn't break anything, but makes stmts
// slightly confusing to read because names that are actually unique
// will get suffixes that falsely hint that they are not.

const int num_unique_name_counters = (1 << 14);

// We want to init these to zero, but cannot use = {0} because that
// would invoke a (deleted) copy ctor. The default initialization for
// atomics doesn't guarantee any actual initialization. Fortunately
// this is a global, which is always zero-initialized.
std::atomic<int> unique_name_counters[num_unique_name_counters] = {};

int unique_count(size_t h) {
h = h & (num_unique_name_counters - 1);
return unique_name_counters[h]++;
return globals().unique_name_counters[h]++;
}
} // namespace

Expand Down Expand Up @@ -611,30 +600,18 @@ bool mul_with_overflow(int bits, int64_t a, int64_t b, int64_t *result) {
}
}

struct TickStackEntry {
std::chrono::time_point<std::chrono::high_resolution_clock> time;
string file;
int line;
};

namespace {

vector<TickStackEntry> tick_stack;

} // namespace

void halide_tic_impl(const char *file, int line) {
string f = file;
f = split_string(f, "/").back();
tick_stack.push_back({std::chrono::high_resolution_clock::now(), f, line});
globals().tick_stack.push_back({std::chrono::high_resolution_clock::now(), f, line});
}

void halide_toc_impl(const char *file, int line) {
auto t1 = tick_stack.back();
auto t1 = globals().tick_stack.back();
auto t2 = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = t2 - t1.time;
tick_stack.pop_back();
for (size_t i = 0; i < tick_stack.size(); i++) {
globals().tick_stack.pop_back();
for (size_t i = 0; i < globals().tick_stack.size(); i++) {
debug(1) << " ";
}
string f = file;
Expand Down Expand Up @@ -702,28 +679,12 @@ void WINAPI generic_fiber_entry_point(LPVOID argument) {

} // namespace Internal

namespace {

struct CompilerStackSize {
CompilerStackSize() {
std::string stack_size = Internal::get_env_variable("HL_COMPILER_STACK_SIZE");
if (stack_size.empty()) {
size = default_compiler_stack_size;
} else {
size = std::atoi(stack_size.c_str());
}
}
size_t size;
} stack_size;

} // namespace

void set_compiler_stack_size(size_t sz) {
stack_size.size = sz;
Internal::globals().compiler_stack_size = sz;
}

size_t get_compiler_stack_size() {
return stack_size.size;
return Internal::globals().compiler_stack_size;
}

namespace Internal {
Expand Down Expand Up @@ -751,7 +712,9 @@ thread_local void *run_with_large_stack_arg = nullptr;
#endif

void run_with_large_stack(const std::function<void()> &action) {
if (stack_size.size == 0) {
const size_t compiler_stack_size = Internal::globals().compiler_stack_size;

if (compiler_stack_size == 0) {
// User has requested no stack swapping
action();
return;
Expand All @@ -765,16 +728,16 @@ void run_with_large_stack(const std::function<void()> &action) {
GetCurrentThreadStackLimits(&stack_low, &stack_high);
ptrdiff_t stack_remaining = (char *)&approx_stack_pos - (char *)stack_low;

if (stack_remaining < stack_size.size) {
debug(1) << "Insufficient stack space (" << stack_remaining << " bytes). Switching to fiber with " << stack_size.size << "-byte stack.\n";
if (stack_remaining < compiler_stack_size) {
debug(1) << "Insufficient stack space (" << stack_remaining << " bytes). Switching to fiber with " << compiler_stack_size << "-byte stack.\n";

auto was_a_fiber = IsThreadAFiber();

auto *main_fiber = was_a_fiber ? GetCurrentFiber() : ConvertThreadToFiber(nullptr);
internal_assert(main_fiber) << "ConvertThreadToFiber failed with code: " << GetLastError() << "\n";

GenericFiberArgs fiber_args{action, main_fiber};
auto *lower_fiber = CreateFiber(stack_size.size, generic_fiber_entry_point, &fiber_args);
auto *lower_fiber = CreateFiber(compiler_stack_size, generic_fiber_entry_point, &fiber_args);
internal_assert(lower_fiber) << "CreateFiber failed with code: " << GetLastError() << "\n";

SwitchToFiber(lower_fiber);
Expand Down Expand Up @@ -838,17 +801,17 @@ void run_with_large_stack(const std::function<void()> &action) {
// stack frames - 64k.
const size_t guard_band = 64 * 1024;

void *stack = mmap(nullptr, stack_size.size + guard_band, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
void *stack = mmap(nullptr, compiler_stack_size + guard_band, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
internal_assert(stack) << "mmap failed with error " << strerror(errno);

int err = mprotect((char *)stack + stack_size.size, guard_band, PROT_NONE);
int err = mprotect((char *)stack + compiler_stack_size, guard_band, PROT_NONE);
internal_assert(err == 0) << "mprotect failed with error " << strerror(errno);

err = getcontext(&context);
internal_assert(err == 0) << "getcontext failed with error " << strerror(errno);

context.uc_stack.ss_sp = stack;
context.uc_stack.ss_size = stack_size.size;
context.uc_stack.ss_size = compiler_stack_size;
context.uc_stack.ss_flags = 0;
context.uc_link = &calling_context;

Expand All @@ -858,7 +821,7 @@ void run_with_large_stack(const std::function<void()> &action) {
err = swapcontext(&calling_context, &context);
internal_assert(err == 0) << "swapcontext failed with error " << strerror(errno);

err = munmap(stack, stack_size.size + guard_band);
err = munmap(stack, compiler_stack_size + guard_band);
internal_assert(err == 0) << "munmap failed with error " << strerror(errno);

#ifdef HALIDE_WITH_EXCEPTIONS
Expand Down
Loading

0 comments on commit e194a43

Please sign in to comment.