diff --git a/docs/arithmetics.rst b/docs/arithmetics.rst index c9515f23c556b..d0a0ebdd4468b 100644 --- a/docs/arithmetics.rst +++ b/docs/arithmetics.rst @@ -129,6 +129,11 @@ Random number generator Generates a random floating point number from the standard normal distribution. +.. note:: + + On **CPU** and **CUDA** backends, use the ``random_seed`` argument in ``ti.init()`` to specify the integer seed for random number generation. + The random seed is 0 by default. + Element-wise arithmetics for vectors and matrices ------------------------------------------------- diff --git a/docs/global_settings.rst b/docs/global_settings.rst index e205815b08a40..ec2a821a74983 100644 --- a/docs/global_settings.rst +++ b/docs/global_settings.rst @@ -25,6 +25,7 @@ Runtime - Restart the entire Taichi system (destroy all fields and kernels): ``ti.reset()``. - To start program in debug mode: ``ti.init(debug=True)`` or ``ti debug your_script.py``. - To disable importing torch on start up: ``export TI_ENABLE_TORCH=0``. +- To change the random seed for random number generation: ``ti.init(random_seed=42)`` (effective on CPU and CUDA backends only). Logging ******* diff --git a/misc/links.md b/misc/links.md index c17c7fb0a1e58..e8c3ce2c04f78 100644 --- a/misc/links.md +++ b/misc/links.md @@ -11,4 +11,4 @@ - [Taichi GLSL](https://github.com/taichi-dev/taichi_glsl): A Taichi extension library that provides a set of GLSL-style helper functions. - [Taichi Blend](https://github.com/taichi-dev/taichi_blend): Taichi Blender intergration for physics-based animations (work in progress) - [Taichi.js](https://github.com/taichi-dev/taichi.js): Run compiled Taichi programs in Javascript and WASM (work in progress). -- [Shadertoy in Taichi](https://github.com/Phonicavi/Shadertoy-taichi): Some shadertoy examples implemented in Taichi, by [Qiu Feng (Phonicavi)](https://github.com/Phonicavi). \ No newline at end of file +- [Shadertoy in Taichi](https://github.com/Phonicavi/Shadertoy-taichi): Some shadertoy examples implemented in Taichi, by [Qiu Feng (Phonicavi)](https://github.com/Phonicavi). diff --git a/taichi/program/compile_config.cpp b/taichi/program/compile_config.cpp index b0bfe1a05388d..cdd4da23773e5 100644 --- a/taichi/program/compile_config.cpp +++ b/taichi/program/compile_config.cpp @@ -42,6 +42,7 @@ CompileConfig::CompileConfig() { saturating_grid_dim = 0; max_block_dim = 0; cpu_max_num_threads = std::thread::hardware_concurrency(); + random_seed = 0; ad_stack_size = 16; diff --git a/taichi/program/compile_config.h b/taichi/program/compile_config.h index b6330605359ec..e25aeb34ea927 100644 --- a/taichi/program/compile_config.h +++ b/taichi/program/compile_config.h @@ -46,6 +46,7 @@ struct CompileConfig { int saturating_grid_dim; int max_block_dim; int cpu_max_num_threads; + int random_seed; // LLVM backend options: bool print_struct_llvm_ir; diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index 76368ba93e982..cd02b673506bc 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -307,6 +307,11 @@ void Program::initialize_runtime_system(StructCompiler *scomp) { auto snodes = scomp->snodes; int root_id = snode_root->id; + // Starting random state for the program calculated using the random seed. + // The seed is multiplied by 2^20 so that two programs with different seeds + // will not have overlapping random states in any thread. + int starting_rand_state = config.random_seed * 1048576; + // Number of random states. One per CPU/CUDA thread. int num_rand_states = 0; @@ -325,12 +330,12 @@ void Program::initialize_runtime_system(StructCompiler *scomp) { TI_TRACE("Allocating data structure of size {} B", scomp->root_size); TI_TRACE("Allocating {} random states (used by CUDA only)", num_rand_states); - runtime->call("runtime_initialize", result_buffer, this, - (std::size_t)scomp->root_size, prealloc_size, - preallocated_device_buffer, num_rand_states, - (void *)&taichi_allocate_aligned, - (void *)std::printf, (void *)std::vsnprintf); + runtime->call( + "runtime_initialize", result_buffer, this, (std::size_t)scomp->root_size, + prealloc_size, preallocated_device_buffer, starting_rand_state, + num_rand_states, (void *)&taichi_allocate_aligned, (void *)std::printf, + (void *)std::vsnprintf); TI_TRACE("LLVMRuntime initialized"); llvm_runtime = fetch_result(taichi_result_buffer_ret_value_id); diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 81979203dc9c6..a6fea501b576b 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -95,9 +95,8 @@ void export_lang(py::module &m) { .def(py::self == py::self) .def("__hash__", &DataType::hash) .def("to_string", &DataType::to_string) - .def( - "get_ptr", [](DataType *dtype) -> Type * { return *dtype; }, - py::return_value_policy::reference) + .def("get_ptr", [](DataType *dtype) -> Type * { return *dtype; }, + py::return_value_policy::reference) .def(py::pickle( [](const DataType &dt) { // Note: this only works for primitive types, which is fine for now. @@ -148,6 +147,7 @@ void export_lang(py::module &m) { .def_readwrite("saturating_grid_dim", &CompileConfig::saturating_grid_dim) .def_readwrite("max_block_dim", &CompileConfig::max_block_dim) .def_readwrite("cpu_max_num_threads", &CompileConfig::cpu_max_num_threads) + .def_readwrite("random_seed", &CompileConfig::random_seed) .def_readwrite("verbose_kernel_launches", &CompileConfig::verbose_kernel_launches) .def_readwrite("verbose", &CompileConfig::verbose) @@ -195,10 +195,9 @@ void export_lang(py::module &m) { m.def("reset_default_compile_config", [&]() { default_compile_config = CompileConfig(); }); - m.def( - "default_compile_config", - [&]() -> CompileConfig & { return default_compile_config; }, - py::return_value_policy::reference); + m.def("default_compile_config", + [&]() -> CompileConfig & { return default_compile_config; }, + py::return_value_policy::reference); py::class_(m, "Program") .def(py::init<>()) @@ -215,12 +214,11 @@ void export_lang(py::module &m) { }) .def("print_memory_profiler_info", &Program::print_memory_profiler_info) .def("finalize", &Program::finalize) - .def( - "get_root", - [&](Program *program) -> SNode * { - return program->snode_root.get(); - }, - py::return_value_policy::reference) + .def("get_root", + [&](Program *program) -> SNode * { + return program->snode_root.get(); + }, + py::return_value_policy::reference) .def("get_total_compilation_time", &Program::get_total_compilation_time) .def("print_snode_tree", &Program::print_snode_tree) .def("get_snode_num_dynamically_allocated", @@ -235,10 +233,9 @@ void export_lang(py::module &m) { m.def("get_current_program", get_current_program, py::return_value_policy::reference); - m.def( - "current_compile_config", - [&]() -> CompileConfig & { return get_current_program().config; }, - py::return_value_policy::reference); + m.def("current_compile_config", + [&]() -> CompileConfig & { return get_current_program().config; }, + py::return_value_policy::reference); py::class_(m, "Index").def(py::init()); py::class_(m, "SNode") @@ -273,10 +270,9 @@ void export_lang(py::module &m) { .def("data_type", [](SNode *snode) { return snode->dt; }) .def("get_num_ch", [](SNode *snode) -> int { return (int)snode->ch.size(); }) - .def( - "get_ch", - [](SNode *snode, int i) -> SNode * { return snode->ch[i].get(); }, - py::return_value_policy::reference) + .def("get_ch", + [](SNode *snode, int i) -> SNode * { return snode->ch[i].get(); }, + py::return_value_policy::reference) .def("lazy_grad", [](SNode *snode) { make_lazy_grad(snode, @@ -376,14 +372,13 @@ void export_lang(py::module &m) { py::class_(m, "Stmt"); py::class_(m, "KernelProxy") - .def( - "define", - [](Program::KernelProxy *ker, - const std::function &func) -> Kernel & { - py::gil_scoped_release release; - return ker->def(func); - }, - py::return_value_policy::reference); + .def("define", + [](Program::KernelProxy *ker, + const std::function &func) -> Kernel & { + py::gil_scoped_release release; + return ker->def(func); + }, + py::return_value_policy::reference); m.def("insert_deactivate", [](SNode *snode, const ExprGroup &indices) { return Deactivate(snode, indices); diff --git a/taichi/runtime/llvm/runtime.cpp b/taichi/runtime/llvm/runtime.cpp index 59d43583b3633..8906d1d80ec44 100644 --- a/taichi/runtime/llvm/runtime.cpp +++ b/taichi/runtime/llvm/runtime.cpp @@ -833,6 +833,7 @@ void runtime_initialize( std::size_t preallocated_size, // Non-zero means use the preallocated buffer Ptr preallocated_buffer, + i32 starting_rand_state, i32 num_rand_states, void *_vm_allocator, void *_host_printf, @@ -885,7 +886,7 @@ void runtime_initialize( runtime->rand_states = (RandState *)runtime->allocate_aligned( sizeof(RandState) * runtime->num_rand_states, taichi_page_size); for (int i = 0; i < runtime->num_rand_states; i++) - initialize_rand_state(&runtime->rand_states[i], i); + initialize_rand_state(&runtime->rand_states[i], starting_rand_state + i); } void runtime_initialize2(LLVMRuntime *runtime, int root_id, int num_snodes) { diff --git a/taichi/transforms/auto_diff.cpp b/taichi/transforms/auto_diff.cpp index 04c5a4c00adfb..a3d7d81669360 100644 --- a/taichi/transforms/auto_diff.cpp +++ b/taichi/transforms/auto_diff.cpp @@ -192,16 +192,18 @@ class ReplaceLocalVarWithStacks : public BasicStmtVisitor { void visit(AllocaStmt *alloc) override { TI_ASSERT(alloc->width() == 1); - bool load_only = - irpass::analysis::gather_statements(alloc->parent, [&](Stmt *s) { - if (auto store = s->cast()) - return store->dest == alloc; - else if (auto atomic = s->cast()) { - return atomic->dest == alloc; - } else { - return false; - } - }).empty(); + bool load_only = irpass::analysis::gather_statements( + alloc->parent, + [&](Stmt *s) { + if (auto store = s->cast()) + return store->dest == alloc; + else if (auto atomic = s->cast()) { + return atomic->dest == alloc; + } else { + return false; + } + }) + .empty(); if (!load_only) { auto dtype = alloc->ret_type; auto stack_alloca = Stmt::make(dtype, ad_stack_size); diff --git a/tests/python/test_random.py b/tests/python/test_random.py index fc3015b73b62b..3b93f0ce4baf4 100644 --- a/tests/python/test_random.py +++ b/tests/python/test_random.py @@ -107,6 +107,27 @@ def gen(i: ti.i32): assert count <= n * 0.15 +@ti.test(arch=[ti.cpu, ti.cuda]) +def test_random_seed_per_program(): + import numpy as np + n = 10 + result = [] + for s in [0, 1]: + ti.init(random_seed=s) + x = ti.field(ti.f32, shape=n) + + @ti.kernel + def gen(): + for i in x: + x[i] = ti.random() + + gen() + result.append(x.to_numpy()) + ti.reset() + + assert not np.allclose(result[0], result[1]) + + @ti.test(arch=[ti.cpu, ti.cuda]) def test_random_f64(): '''