Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 1062 ode speedup #1066

Merged
merged 36 commits into from
May 10, 2019
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
877d5c3
tune access to AD stack during ODE integration of the coupled system
Nov 7, 2018
72922fe
make all tests happy
Nov 7, 2018
3709f55
recover adjoints of outer AD tree
Nov 17, 2018
f4046a7
more lazy cleanup
Nov 17, 2018
223885f
Merge remote-tracking branch 'origin/develop' into feature/issue-1062…
wds15 Jan 27, 2019
017948c
avoid re-allocation of constant parameter vector on nested AD stack b…
wds15 Jan 27, 2019
733f9af
avoid a few copies and add explicit move semantics where appropiate i…
wds15 Jan 27, 2019
2c8856f
move local theta copy to nonstack tape
wds15 Jan 28, 2019
9dbd0f9
add tests for constant stack size when instantiating coupled_ode_system
weberse2 Jan 29, 2019
4d22c0c
[Jenkins] auto-formatting by clang-format version 5.0.0-3~16.04.1 (ta…
stan-buildbot Jan 29, 2019
965ab13
Merge branch 'feature/issue-1062-ode-speedup' of https://github.com/s…
wds15 Feb 3, 2019
b8c64e7
rename theta_copy_ to theta_nochain_ and add doc for the special hand…
wds15 Feb 3, 2019
718804c
address review comments
wds15 Feb 4, 2019
f1d3c03
add a comment to explain "manual" adjoint zeroing
weberse2 Feb 21, 2019
2561e04
Merge commit 'd7dc42dee65a1f08ef9c2195ba940531e2e797a6' into HEAD
yashikno Feb 21, 2019
29b91dc
[Jenkins] auto-formatting by clang-format version 5.0.0-3~16.04.1 (ta…
stan-buildbot Feb 21, 2019
7715209
Merge branch 'feature/issue-1062-ode-speedup' of https://github.com/s…
weberse2 Feb 21, 2019
b365d53
ange design of speedup approach
wds15 Mar 2, 2019
f15c6ef
optimize reuse of theta adjoint in global AD tree
wds15 Mar 2, 2019
6c5beec
Merge branch 'feature/issue-1062-ode-speedup' of https://github.com/s…
wds15 Mar 2, 2019
86154f9
Merge remote-tracking branch 'origin/develop' into feature/issue-1062…
wds15 Mar 14, 2019
d4957de
add doc on backup-and-restore approach
wds15 Mar 14, 2019
0922a26
Merge branch 'feature/issue-1062-ode-speedup' of https://github.com/s…
weberse2 Apr 1, 2019
2a365db
put in approach which does not modify the outer AD tree; still needs …
weberse2 Apr 1, 2019
03c7a5e
Merge commit 'cb204e0bf0735a294c2689fc8bb3a4572a1becee' into HEAD
yashikno Apr 1, 2019
1e78847
[Jenkins] auto-formatting by clang-format version 5.0.0-3~16.04.1 (ta…
stan-buildbot Apr 1, 2019
b5053c1
Merge branch 'feature/issue-1062-ode-speedup' of https://github.com/s…
weberse2 Apr 2, 2019
83ca05d
Merge remote-tracking branch 'origin/develop' into feature/issue-1062…
wds15 May 5, 2019
e506db8
revert to the nochain approach
wds15 May 5, 2019
c0fb42c
doc special nochain speedup side-effects
wds15 May 5, 2019
f07855b
Merge branch 'feature/issue-1062-ode-speedup' of https://github.com/s…
wds15 May 5, 2019
2390317
Merge branch 'feature/issue-1062-ode-speedup' of https://github.com/s…
weberse2 May 9, 2019
20883c6
remove left-over outdated doc
weberse2 May 9, 2019
f71715f
address reviewer comments on test; make test-purpose for the size cle…
weberse2 May 9, 2019
cc72c41
Merge commit '57215ead04f95c25cb36b7b8f21776d0090cff55' into HEAD
yashikno May 9, 2019
fd7846f
[Jenkins] auto-formatting by clang-format version 5.0.0-3~16.04.1 (ta…
stan-buildbot May 9, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 36 additions & 23 deletions stan/math/rev/arr/functor/coupled_ode_system.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,23 @@ namespace math {
* <p>The final M states correspond to the sensitivities with respect
* to the second base system equation, etc.
*
* <p>Note: For efficiency reasons we place a deep copy of the theta_
* parameter vector on the nochain var stack as theta_nochain_. Doing
* so separates the parameter vector from it's own operands and allows
* to use the same parameter vector instance as input for the ODE RHS
* functor f_. Since the jacobian of the ODE RHS is constructed in a
* nested AD operation we have to reset the adjoints of the
* theta_nochain_ variables with an additional for loop after
* set_zero_all_adjoints_nested().
*
* @tparam F type of functor for the base ode system.
*/
template <typename F>
struct coupled_ode_system<F, double, var> {
const F& f_;
const std::vector<double>& y0_dbl_;
const std::vector<var>& theta_;
const std::vector<double> theta_dbl_;
std::vector<var> theta_nochain_;
const std::vector<double>& x_;
const std::vector<int>& x_int_;
const size_t N_;
Expand All @@ -72,13 +81,15 @@ struct coupled_ode_system<F, double, var> {
: f_(f),
y0_dbl_(y0),
theta_(theta),
theta_dbl_(value_of(theta)),
x_(x),
x_int_(x_int),
N_(y0.size()),
M_(theta.size()),
size_(N_ + N_ * M_),
msgs_(msgs) {}
msgs_(msgs) {
for (const var& p : theta)
theta_nochain_.emplace_back(var(new vari(p.val(), false)));
}

/**
* Assign the derivative vector with the system derivatives at
Expand All @@ -104,11 +115,9 @@ struct coupled_ode_system<F, double, var> {
try {
start_nested();

vector<var> y_vars(z.begin(), z.begin() + N_);

vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end());
const vector<var> y_vars(z.begin(), z.begin() + N_);

vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_);
vector<var> dy_dt_vars = f_(t, y_vars, theta_nochain_, x_, x_int_, msgs_);

check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(),
"states", N_);
Expand All @@ -121,7 +130,7 @@ struct coupled_ode_system<F, double, var> {
// orders derivatives by equation (i.e. if there are 2 eqns
// (y1, y2) and 2 parameters (a, b), dy_dt will be ordered as:
// dy1_dt, dy2_dt, dy1_da, dy2_da, dy1_db, dy2_db
double temp_deriv = theta_vars[j].adj();
double temp_deriv = theta_nochain_[j].adj();
const size_t offset = N_ + N_ * j;
for (size_t k = 0; k < N_; k++)
temp_deriv += z[offset + k] * y_vars[k].adj();
Expand All @@ -130,6 +139,8 @@ struct coupled_ode_system<F, double, var> {
}

set_zero_all_adjoints_nested();
for (size_t j = 0; j < M_; ++j)
syclik marked this conversation as resolved.
Show resolved Hide resolved
theta_nochain_[j].vi_->set_zero_adjoint();
}
} catch (const std::exception& e) {
recover_memory_nested();
Expand Down Expand Up @@ -222,7 +233,6 @@ template <typename F>
struct coupled_ode_system<F, var, double> {
const F& f_;
const std::vector<var>& y0_;
const std::vector<double> y0_dbl_;
const std::vector<double>& theta_dbl_;
const std::vector<double>& x_;
const std::vector<int>& x_int_;
Expand Down Expand Up @@ -250,7 +260,6 @@ struct coupled_ode_system<F, var, double> {
const std::vector<int>& x_int, std::ostream* msgs)
: f_(f),
y0_(y0),
y0_dbl_(value_of(y0)),
theta_dbl_(theta),
x_(x),
x_int_(x_int),
Expand Down Expand Up @@ -282,7 +291,7 @@ struct coupled_ode_system<F, var, double> {
try {
start_nested();

vector<var> y_vars(z.begin(), z.begin() + N_);
const vector<var> y_vars(z.begin(), z.begin() + N_);

vector<var> dy_dt_vars = f_(t, y_vars, theta_dbl_, x_, x_int_, msgs_);

Expand Down Expand Up @@ -338,7 +347,7 @@ struct coupled_ode_system<F, var, double> {
std::vector<double> initial_state() const {
std::vector<double> initial(size_, 0.0);
for (size_t i = 0; i < N_; i++)
initial[i] = y0_dbl_[i];
initial[i] = value_of(y0_[i]);
syclik marked this conversation as resolved.
Show resolved Hide resolved
for (size_t i = 0; i < N_; i++)
initial[N_ + i * N_ + i] = 1.0;
return initial;
Expand Down Expand Up @@ -408,15 +417,18 @@ struct coupled_ode_system<F, var, double> {
* + M) states. (derivatives of each state with respect to each
* initial value and each theta)
*
* <p>Note: Please refer to the efficiency note of the
* coupled_ode_system<F, double, var> partial template specialization
* for the rationale behind theta_nochain_.
*
* @tparam F the functor for the base ode system
*/
template <typename F>
struct coupled_ode_system<F, var, var> {
const F& f_;
const std::vector<var>& y0_;
const std::vector<double> y0_dbl_;
const std::vector<var>& theta_;
const std::vector<double> theta_dbl_;
std::vector<var> theta_nochain_;
const std::vector<double>& x_;
const std::vector<int>& x_int_;
const size_t N_;
Expand All @@ -443,15 +455,16 @@ struct coupled_ode_system<F, var, var> {
const std::vector<int>& x_int, std::ostream* msgs)
: f_(f),
y0_(y0),
y0_dbl_(value_of(y0)),
theta_(theta),
theta_dbl_(value_of(theta)),
x_(x),
x_int_(x_int),
N_(y0.size()),
M_(theta.size()),
size_(N_ + N_ * (N_ + M_)),
msgs_(msgs) {}
msgs_(msgs) {
for (const var& p : theta)
theta_nochain_.emplace_back(var(new vari(p.val(), false)));
}

/**
* Populates the derivative vector with derivatives of the
Expand All @@ -476,11 +489,9 @@ struct coupled_ode_system<F, var, var> {
try {
start_nested();

vector<var> y_vars(z.begin(), z.begin() + N_);

vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end());
const vector<var> y_vars(z.begin(), z.begin() + N_);

vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_);
vector<var> dy_dt_vars = f_(t, y_vars, theta_nochain_, x_, x_int_, msgs_);

check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(),
"states", N_);
Expand All @@ -502,7 +513,7 @@ struct coupled_ode_system<F, var, var> {
}

for (size_t j = 0; j < M_; j++) {
double temp_deriv = theta_vars[j].adj();
double temp_deriv = theta_nochain_[j].adj();
const size_t offset = N_ + N_ * N_ + N_ * j;
for (size_t k = 0; k < N_; k++)
temp_deriv += z[offset + k] * y_vars[k].adj();
Expand All @@ -511,6 +522,8 @@ struct coupled_ode_system<F, var, var> {
}

set_zero_all_adjoints_nested();
for (size_t j = 0; j < M_; ++j)
theta_nochain_[j].vi_->set_zero_adjoint();
}
} catch (const std::exception& e) {
recover_memory_nested();
Expand Down Expand Up @@ -540,7 +553,7 @@ struct coupled_ode_system<F, var, var> {
std::vector<double> initial_state() const {
std::vector<double> initial(size_, 0.0);
for (size_t i = 0; i < N_; i++)
initial[i] = y0_dbl_[i];
initial[i] = value_of(y0_[i]);
for (size_t i = 0; i < N_; i++)
initial[N_ + i * N_ + i] = 1.0;
return initial;
Expand Down
2 changes: 1 addition & 1 deletion stan/math/rev/core/var.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ static void grad(vari* vi);
* Independent (input) and dependent (output) variables for gradients.
*
* This class acts as a smart pointer, with resources managed by
* an agenda-based memory manager scoped to a single gradient
* an arena-based memory manager scoped to a single gradient
syclik marked this conversation as resolved.
Show resolved Hide resolved
* calculation.
*
* An var is constructed with a double and used like any
Expand Down
15 changes: 7 additions & 8 deletions stan/math/rev/mat/functor/cvodes_ode_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ class cvodes_ode_data {
*/
inline void rhs(double t, const double y[], double dy_dt[]) const {
const std::vector<double> y_vec(y, y + N_);
const std::vector<double> dy_dt_vec
const std::vector<double>& dy_dt_vec
= f_(t, y_vec, theta_dbl_, x_, x_int_, msgs_);
check_size_match("cvodes_ode_data", "dz_dt", dy_dt_vec.size(), "states",
N_);
std::copy(dy_dt_vec.begin(), dy_dt_vec.end(), dy_dt);
std::move(dy_dt_vec.begin(), dy_dt_vec.end(), dy_dt);
syclik marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand All @@ -165,14 +165,13 @@ class cvodes_ode_data {
* y to be the initial of the coupled ode system.
*/
inline int jacobian_states(double t, const double y[], SUNMatrix J) const {
const std::vector<double> y_vec(y, y + N_);
start_nested();
std::vector<var> y_vec_var(y_vec.begin(), y_vec.end());
const std::vector<var> y_vec_var(y, y + N_);
coupled_ode_system<F, var, double> ode_jacobian(f_, y_vec_var, theta_dbl_,
x_, x_int_, msgs_);
std::vector<double> jacobian_y(ode_jacobian.size(), 0);
std::vector<double>&& jacobian_y = std::vector<double>(ode_jacobian.size());
syclik marked this conversation as resolved.
Show resolved Hide resolved
ode_jacobian(ode_jacobian.initial_state(), jacobian_y, t);
std::copy(jacobian_y.begin() + N_, jacobian_y.end(), SM_DATA_D(J));
std::move(jacobian_y.begin() + N_, jacobian_y.end(), SM_DATA_D(J));
recover_memory_nested();
return 0;
}
Expand All @@ -186,14 +185,14 @@ class cvodes_ode_data {
inline void rhs_sens(double t, const double y[], N_Vector* yS,
N_Vector* ySdot) const {
std::vector<double> z(coupled_state_.size());
std::vector<double> dz_dt(coupled_state_.size());
std::vector<double>&& dz_dt = std::vector<double>(coupled_state_.size());
std::copy(y, y + N_, z.begin());
for (std::size_t s = 0; s < S_; s++)
std::copy(NV_DATA_S(yS[s]), NV_DATA_S(yS[s]) + N_,
z.begin() + (s + 1) * N_);
coupled_ode_(z, dz_dt, t);
for (std::size_t s = 0; s < S_; s++)
std::copy(dz_dt.begin() + (s + 1) * N_, dz_dt.begin() + (s + 2) * N_,
std::move(dz_dt.begin() + (s + 1) * N_, dz_dt.begin() + (s + 2) * N_,
NV_DATA_S(ySdot[s]));
}
};
Expand Down
25 changes: 25 additions & 0 deletions test/unit/math/rev/arr/functor/coupled_ode_system_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ struct StanAgradRevOde : public ::testing::Test {
TEST_F(StanAgradRevOde, coupled_ode_system_dv) {
using stan::math::coupled_ode_system;

stan::math::start_nested();

harm_osc_ode_fun harm_osc;

std::vector<stan::math::var> theta;
Expand All @@ -38,15 +40,21 @@ TEST_F(StanAgradRevOde, coupled_ode_system_dv) {
coupled_y0.push_back(1.0);
coupled_y0.push_back(2.0);

std::size_t stack_size = stan::math::nested_size();

coupled_ode_system<harm_osc_ode_fun, double, stan::math::var> system(
harm_osc, y0, theta, x, x_int, &msgs);

EXPECT_EQ(stan::math::nested_size(), stack_size);
syclik marked this conversation as resolved.
Show resolved Hide resolved

system(coupled_y0, dy_dt, t0);

EXPECT_FLOAT_EQ(0.5, dy_dt[0]);
EXPECT_FLOAT_EQ(-1.075, dy_dt[1]);
EXPECT_FLOAT_EQ(2, dy_dt[2]);
EXPECT_FLOAT_EQ(-1.8, dy_dt[3]);

stan::math::recover_memory_nested();
}
TEST_F(StanAgradRevOde, decouple_states_dv) {
using stan::math::coupled_ode_system;
Expand Down Expand Up @@ -187,6 +195,8 @@ TEST_F(StanAgradRevOde, memory_recovery_exception_dv) {
TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
using stan::math::coupled_ode_system;

stan::math::start_nested();

harm_osc_ode_fun harm_osc;

std::vector<double> theta;
Expand All @@ -211,9 +221,13 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
y0_var.push_back(1.0);
y0_var.push_back(0.5);

std::size_t stack_size = stan::math::nested_size();

coupled_ode_system<harm_osc_ode_fun, stan::math::var, double> system(
harm_osc, y0_var, theta, x, x_int, &msgs);

EXPECT_EQ(stan::math::nested_size(), stack_size);

system(coupled_y0, dy_dt, t0);

EXPECT_FLOAT_EQ(0.5, dy_dt[0]);
Expand All @@ -222,6 +236,8 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
EXPECT_FLOAT_EQ(-1.0 * 1.0 - 0.15 * 0.0, dy_dt[3]);
EXPECT_FLOAT_EQ(0.0 * 0.0 + 1.0 * 1.0, dy_dt[4]);
EXPECT_FLOAT_EQ(-1.0 * 0.0 - 0.15 * 1.0, dy_dt[5]);

stan::math::recover_memory_nested();
}
TEST_F(StanAgradRevOde, decouple_states_vd) {
using stan::math::coupled_ode_system;
Expand Down Expand Up @@ -362,6 +378,8 @@ TEST_F(StanAgradRevOde, memory_recovery_exception_vd) {
TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
using stan::math::coupled_ode_system;

stan::math::start_nested();

std::vector<stan::math::var> y0_var;
y0_var.push_back(1.0);
y0_var.push_back(0.5);
Expand All @@ -370,9 +388,14 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
theta_var.push_back(0.15);

harm_osc_ode_fun harm_osc;

std::size_t stack_size = stan::math::nested_size();

coupled_ode_system<harm_osc_ode_fun, stan::math::var, stan::math::var> system(
harm_osc, y0_var, theta_var, x, x_int, &msgs);

EXPECT_EQ(stan::math::nested_size(), stack_size);

std::vector<double> coupled_y0(8, 0);
coupled_y0[0] = 1.0;
coupled_y0[1] = 0.5;
Expand Down Expand Up @@ -403,6 +426,8 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
EXPECT_FLOAT_EQ(-0.15, dy_dt[5]);
EXPECT_FLOAT_EQ(0, dy_dt[6]);
EXPECT_FLOAT_EQ(-0.5, dy_dt[7]);

stan::math::recover_memory_nested();
}
TEST_F(StanAgradRevOde, decouple_states_vv) {
using stan::math::coupled_ode_system;
Expand Down