Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LLVM] Add random seed support #2297

Merged
merged 18 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/arithmetics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ Random number generator

Generates a random floating point number from the standard normal distribution.

.. note::

On **CPU** and **CUDA** backends, use the ``random_seed`` argument in ``ti.init()`` to specify the integer seed for random number generation.
The random seed is 0 by default.

Element-wise arithmetics for vectors and matrices
-------------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions docs/global_settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Runtime
- Restart the entire Taichi system (destroy all fields and kernels): ``ti.reset()``.
- To start program in debug mode: ``ti.init(debug=True)`` or ``ti debug your_script.py``.
- To disable importing torch on start up: ``export TI_ENABLE_TORCH=0``.
- To change the random seed for random number generation: ``ti.init(random_seed=42)`` (effective on CPU and CUDA backends only).

Logging
*******
Expand Down
2 changes: 1 addition & 1 deletion misc/links.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
- [Taichi GLSL](https://github.com/taichi-dev/taichi_glsl): A Taichi extension library that provides a set of GLSL-style helper functions.
- [Taichi Blend](https://github.com/taichi-dev/taichi_blend): Taichi Blender intergration for physics-based animations (work in progress)
- [Taichi.js](https://github.com/taichi-dev/taichi.js): Run compiled Taichi programs in Javascript and WASM (work in progress).
- [Shadertoy in Taichi](https://github.com/Phonicavi/Shadertoy-taichi): Some shadertoy examples implemented in Taichi, by [Qiu Feng (Phonicavi)](https://github.com/Phonicavi).
- [Shadertoy in Taichi](https://github.com/Phonicavi/Shadertoy-taichi): Some shadertoy examples implemented in Taichi, by [Qiu Feng (Phonicavi)](https://github.com/Phonicavi).
1 change: 1 addition & 0 deletions taichi/program/compile_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ CompileConfig::CompileConfig() {
saturating_grid_dim = 0;
max_block_dim = 0;
cpu_max_num_threads = std::thread::hardware_concurrency();
random_seed = 0;

ad_stack_size = 16;

Expand Down
1 change: 1 addition & 0 deletions taichi/program/compile_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct CompileConfig {
int saturating_grid_dim;
int max_block_dim;
int cpu_max_num_threads;
int random_seed;

// LLVM backend options:
bool print_struct_llvm_ir;
Expand Down
17 changes: 11 additions & 6 deletions taichi/program/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,11 @@ void Program::initialize_runtime_system(StructCompiler *scomp) {
auto snodes = scomp->snodes;
int root_id = snode_root->id;

// Starting random state for the program calculated using the random seed.
// The seed is multiplied by 2^20 so that two programs with different seeds
// will not have overlapping random states in any thread.
int starting_rand_state = config.random_seed * 1048576;

// Number of random states. One per CPU/CUDA thread.
int num_rand_states = 0;

Expand All @@ -325,12 +330,12 @@ void Program::initialize_runtime_system(StructCompiler *scomp) {
TI_TRACE("Allocating data structure of size {} B", scomp->root_size);
TI_TRACE("Allocating {} random states (used by CUDA only)", num_rand_states);

runtime->call<void *, void *, std::size_t, std::size_t, void *, int, void *,
void *, void *>("runtime_initialize", result_buffer, this,
(std::size_t)scomp->root_size, prealloc_size,
preallocated_device_buffer, num_rand_states,
(void *)&taichi_allocate_aligned,
(void *)std::printf, (void *)std::vsnprintf);
runtime->call<void *, void *, std::size_t, std::size_t, void *, int, int,
void *, void *, void *>(
"runtime_initialize", result_buffer, this, (std::size_t)scomp->root_size,
prealloc_size, preallocated_device_buffer, starting_rand_state,
num_rand_states, (void *)&taichi_allocate_aligned, (void *)std::printf,
(void *)std::vsnprintf);

TI_TRACE("LLVMRuntime initialized");
llvm_runtime = fetch_result<void *>(taichi_result_buffer_ret_value_id);
Expand Down
53 changes: 24 additions & 29 deletions taichi/python/export_lang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ void export_lang(py::module &m) {
.def(py::self == py::self)
.def("__hash__", &DataType::hash)
.def("to_string", &DataType::to_string)
.def(
"get_ptr", [](DataType *dtype) -> Type * { return *dtype; },
py::return_value_policy::reference)
.def("get_ptr", [](DataType *dtype) -> Type * { return *dtype; },
py::return_value_policy::reference)
.def(py::pickle(
[](const DataType &dt) {
// Note: this only works for primitive types, which is fine for now.
Expand Down Expand Up @@ -148,6 +147,7 @@ void export_lang(py::module &m) {
.def_readwrite("saturating_grid_dim", &CompileConfig::saturating_grid_dim)
.def_readwrite("max_block_dim", &CompileConfig::max_block_dim)
.def_readwrite("cpu_max_num_threads", &CompileConfig::cpu_max_num_threads)
.def_readwrite("random_seed", &CompileConfig::random_seed)
.def_readwrite("verbose_kernel_launches",
&CompileConfig::verbose_kernel_launches)
.def_readwrite("verbose", &CompileConfig::verbose)
Expand Down Expand Up @@ -195,10 +195,9 @@ void export_lang(py::module &m) {
m.def("reset_default_compile_config",
[&]() { default_compile_config = CompileConfig(); });

m.def(
"default_compile_config",
[&]() -> CompileConfig & { return default_compile_config; },
py::return_value_policy::reference);
m.def("default_compile_config",
[&]() -> CompileConfig & { return default_compile_config; },
py::return_value_policy::reference);

py::class_<Program>(m, "Program")
.def(py::init<>())
Expand All @@ -215,12 +214,11 @@ void export_lang(py::module &m) {
})
.def("print_memory_profiler_info", &Program::print_memory_profiler_info)
.def("finalize", &Program::finalize)
.def(
"get_root",
[&](Program *program) -> SNode * {
return program->snode_root.get();
},
py::return_value_policy::reference)
.def("get_root",
[&](Program *program) -> SNode * {
return program->snode_root.get();
},
py::return_value_policy::reference)
.def("get_total_compilation_time", &Program::get_total_compilation_time)
.def("print_snode_tree", &Program::print_snode_tree)
.def("get_snode_num_dynamically_allocated",
Expand All @@ -235,10 +233,9 @@ void export_lang(py::module &m) {
m.def("get_current_program", get_current_program,
py::return_value_policy::reference);

m.def(
"current_compile_config",
[&]() -> CompileConfig & { return get_current_program().config; },
py::return_value_policy::reference);
m.def("current_compile_config",
[&]() -> CompileConfig & { return get_current_program().config; },
py::return_value_policy::reference);

py::class_<Index>(m, "Index").def(py::init<int>());
py::class_<SNode>(m, "SNode")
Expand Down Expand Up @@ -273,10 +270,9 @@ void export_lang(py::module &m) {
.def("data_type", [](SNode *snode) { return snode->dt; })
.def("get_num_ch",
[](SNode *snode) -> int { return (int)snode->ch.size(); })
.def(
"get_ch",
[](SNode *snode, int i) -> SNode * { return snode->ch[i].get(); },
py::return_value_policy::reference)
.def("get_ch",
[](SNode *snode, int i) -> SNode * { return snode->ch[i].get(); },
py::return_value_policy::reference)
.def("lazy_grad",
[](SNode *snode) {
make_lazy_grad(snode,
Expand Down Expand Up @@ -376,14 +372,13 @@ void export_lang(py::module &m) {

py::class_<Stmt>(m, "Stmt");
py::class_<Program::KernelProxy>(m, "KernelProxy")
.def(
"define",
[](Program::KernelProxy *ker,
const std::function<void()> &func) -> Kernel & {
py::gil_scoped_release release;
return ker->def(func);
},
py::return_value_policy::reference);
.def("define",
[](Program::KernelProxy *ker,
const std::function<void()> &func) -> Kernel & {
py::gil_scoped_release release;
return ker->def(func);
},
py::return_value_policy::reference);

m.def("insert_deactivate", [](SNode *snode, const ExprGroup &indices) {
return Deactivate(snode, indices);
Expand Down
3 changes: 2 additions & 1 deletion taichi/runtime/llvm/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,7 @@ void runtime_initialize(
std::size_t
preallocated_size, // Non-zero means use the preallocated buffer
Ptr preallocated_buffer,
i32 starting_rand_state,
i32 num_rand_states,
void *_vm_allocator,
void *_host_printf,
Expand Down Expand Up @@ -885,7 +886,7 @@ void runtime_initialize(
runtime->rand_states = (RandState *)runtime->allocate_aligned(
sizeof(RandState) * runtime->num_rand_states, taichi_page_size);
for (int i = 0; i < runtime->num_rand_states; i++)
initialize_rand_state(&runtime->rand_states[i], i);
initialize_rand_state(&runtime->rand_states[i], starting_rand_state + i);
}

void runtime_initialize2(LLVMRuntime *runtime, int root_id, int num_snodes) {
Expand Down
22 changes: 12 additions & 10 deletions taichi/transforms/auto_diff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,16 +192,18 @@ class ReplaceLocalVarWithStacks : public BasicStmtVisitor {

void visit(AllocaStmt *alloc) override {
TI_ASSERT(alloc->width() == 1);
bool load_only =
irpass::analysis::gather_statements(alloc->parent, [&](Stmt *s) {
if (auto store = s->cast<LocalStoreStmt>())
return store->dest == alloc;
else if (auto atomic = s->cast<AtomicOpStmt>()) {
return atomic->dest == alloc;
} else {
return false;
}
}).empty();
bool load_only = irpass::analysis::gather_statements(
alloc->parent,
[&](Stmt *s) {
if (auto store = s->cast<LocalStoreStmt>())
return store->dest == alloc;
else if (auto atomic = s->cast<AtomicOpStmt>()) {
return atomic->dest == alloc;
} else {
return false;
}
})
.empty();
if (!load_only) {
auto dtype = alloc->ret_type;
auto stack_alloca = Stmt::make<AdStackAllocaStmt>(dtype, ad_stack_size);
Expand Down
21 changes: 21 additions & 0 deletions tests/python/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,27 @@ def gen(i: ti.i32):
assert count <= n * 0.15


@ti.test(arch=[ti.cpu, ti.cuda])
def test_random_seed_per_program():
import numpy as np
n = 10
result = []
for s in [0, 1]:
ti.init(random_seed=s)
x = ti.field(ti.f32, shape=n)

@ti.kernel
def gen():
for i in x:
x[i] = ti.random()

gen()
result.append(x.to_numpy())
ti.reset()

assert not np.allclose(result[0], result[1])


@ti.test(arch=[ti.cpu, ti.cuda])
def test_random_f64():
'''
Expand Down