-
-
Notifications
You must be signed in to change notification settings - Fork 188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/issue 1062 ode speedup #1066
Changes from 21 commits
877d5c3
72922fe
3709f55
f4046a7
223885f
017948c
733f9af
2c8856f
9dbd0f9
4d22c0c
965ab13
b8c64e7
718804c
f1d3c03
2561e04
29b91dc
7715209
b365d53
f15c6ef
6c5beec
86154f9
d4957de
0922a26
2a365db
03c7a5e
1e78847
b5053c1
83ca05d
e506db8
c0fb42c
f07855b
2390317
20883c6
f71715f
cc72c41
fd7846f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,14 +38,32 @@ namespace math { | |
* <p>The final M states correspond to the sensitivities with respect | ||
* to the second base system equation, etc. | ||
* | ||
* <p>Note: For efficiency reasons the parameter vector is used as | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this doc doesn't describe the conditions clearly. Shouldn't this state that nothing else can touch the autodiff stack for the duration of this object? We need to start being careful about this because we're moving to parallel computation. (This particular condition requires dealing with this object with care and the user should be trying to limit the lifetime of the objects as much as possible.) |
||
* part of the nested autodiff performed when evaluating the Jacobian | ||
* wrt to the parameters of the ODE RHS. This links the nested | ||
* autodiff with the outer global autodiff stack. At construction of | ||
* the coupled_ode_system the adjoints of the parameter vector are | ||
* saved. Upon destruction of the instance, these adjoints are | ||
* restored to their original values at instantiation of the | ||
* class. Throughout the life-time of the coupled_ode_system the | ||
* adjoints of the parameter vector, which is part of the surrounding | ||
* global autodiff stack, are used and modified. Thus, concurrent | ||
* access to the outer autodiff stack is unsafe while a | ||
* coupled_ode_system instance is in use. | ||
* | ||
* Finally, since the parameter vector is part of the outer autodiff | ||
* stack, the set_zero_adjoint_nested() call does not set these | ||
* adjoints to zero which is why these are zeroed in an extra loop | ||
* following the set_zero_adjoint_nested() call. | ||
* | ||
* @tparam F type of functor for the base ode system. | ||
*/ | ||
template <typename F> | ||
struct coupled_ode_system<F, double, var> { | ||
const F& f_; | ||
const std::vector<double>& y0_dbl_; | ||
const std::vector<var>& theta_; | ||
const std::vector<double> theta_dbl_; | ||
std::vector<double> theta_adj_; | ||
const std::vector<double>& x_; | ||
const std::vector<int>& x_int_; | ||
const size_t N_; | ||
|
@@ -72,13 +90,24 @@ struct coupled_ode_system<F, double, var> { | |
: f_(f), | ||
y0_dbl_(y0), | ||
theta_(theta), | ||
theta_dbl_(value_of(theta)), | ||
theta_adj_(theta.size()), | ||
x_(x), | ||
x_int_(x_int), | ||
N_(y0.size()), | ||
M_(theta.size()), | ||
size_(N_ + N_ * M_), | ||
msgs_(msgs) {} | ||
msgs_(msgs) { | ||
for (size_t j = 0; j < M_; ++j) { | ||
theta_adj_[j] = theta_[j].adj(); | ||
theta_[j].vi_->set_zero_adjoint(); | ||
} | ||
} | ||
|
||
~coupled_ode_system() { | ||
for (size_t j = 0; j < M_; ++j) { | ||
theta_[j].vi_->adj_ = theta_adj_[j]; | ||
} | ||
} | ||
|
||
/** | ||
* Assign the derivative vector with the system derivatives at | ||
|
@@ -104,11 +133,9 @@ struct coupled_ode_system<F, double, var> { | |
try { | ||
start_nested(); | ||
|
||
vector<var> y_vars(z.begin(), z.begin() + N_); | ||
const vector<var> y_vars(z.begin(), z.begin() + N_); | ||
|
||
vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end()); | ||
|
||
vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_); | ||
vector<var> dy_dt_vars = f_(t, y_vars, theta_, x_, x_int_, msgs_); | ||
|
||
check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(), | ||
"states", N_); | ||
|
@@ -121,7 +148,7 @@ struct coupled_ode_system<F, double, var> { | |
// orders derivatives by equation (i.e. if there are 2 eqns | ||
// (y1, y2) and 2 parameters (a, b), dy_dt will be ordered as: | ||
// dy1_dt, dy2_dt, dy1_da, dy2_da, dy1_db, dy2_db | ||
double temp_deriv = theta_vars[j].adj(); | ||
double temp_deriv = theta_[j].adj(); | ||
const size_t offset = N_ + N_ * j; | ||
for (size_t k = 0; k < N_; k++) | ||
temp_deriv += z[offset + k] * y_vars[k].adj(); | ||
|
@@ -130,11 +157,18 @@ struct coupled_ode_system<F, double, var> { | |
} | ||
|
||
set_zero_all_adjoints_nested(); | ||
// Parameters stored on the outer (non-nested) nochain stack are not | ||
// reset to zero by the last call. This is done as a separate step here. | ||
// See efficiency note above on template specalization for more details | ||
// on this. | ||
for (size_t j = 0; j < M_; ++j) | ||
syclik marked this conversation as resolved.
Show resolved
Hide resolved
|
||
theta_[j].vi_->set_zero_adjoint(); | ||
} | ||
} catch (const std::exception& e) { | ||
recover_memory_nested(); | ||
throw; | ||
} | ||
|
||
recover_memory_nested(); | ||
} | ||
|
||
|
@@ -222,7 +256,6 @@ template <typename F> | |
struct coupled_ode_system<F, var, double> { | ||
const F& f_; | ||
const std::vector<var>& y0_; | ||
const std::vector<double> y0_dbl_; | ||
const std::vector<double>& theta_dbl_; | ||
const std::vector<double>& x_; | ||
const std::vector<int>& x_int_; | ||
|
@@ -250,7 +283,6 @@ struct coupled_ode_system<F, var, double> { | |
const std::vector<int>& x_int, std::ostream* msgs) | ||
: f_(f), | ||
y0_(y0), | ||
y0_dbl_(value_of(y0)), | ||
theta_dbl_(theta), | ||
x_(x), | ||
x_int_(x_int), | ||
|
@@ -282,7 +314,7 @@ struct coupled_ode_system<F, var, double> { | |
try { | ||
start_nested(); | ||
|
||
vector<var> y_vars(z.begin(), z.begin() + N_); | ||
const vector<var> y_vars(z.begin(), z.begin() + N_); | ||
|
||
vector<var> dy_dt_vars = f_(t, y_vars, theta_dbl_, x_, x_int_, msgs_); | ||
|
||
|
@@ -338,7 +370,7 @@ struct coupled_ode_system<F, var, double> { | |
std::vector<double> initial_state() const { | ||
std::vector<double> initial(size_, 0.0); | ||
for (size_t i = 0; i < N_; i++) | ||
initial[i] = y0_dbl_[i]; | ||
initial[i] = value_of(y0_[i]); | ||
syclik marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for (size_t i = 0; i < N_; i++) | ||
initial[N_ + i * N_ + i] = 1.0; | ||
return initial; | ||
|
@@ -408,15 +440,32 @@ struct coupled_ode_system<F, var, double> { | |
* + M) states. (derivatives of each state with respect to each | ||
* initial value and each theta) | ||
* | ||
* <p>Note: For efficiency reasons the parameter vector is used as | ||
* part of the nested autodiff performed when evaluating the Jacobian | ||
* wrt to the parameters of the ODE RHS. This links the nested | ||
* autodiff with the outer global autodiff stack. At construction of | ||
* the coupled_ode_system the adjoints of the parameter vector are | ||
* saved. Upon destruction of the instance, these adjoints are | ||
* restored to their original values at instantiation of the | ||
* class. Throughout the life-time of the coupled_ode_system the | ||
* adjoints of the parameter vector, which is part of the surrounding | ||
* global autodiff stack, are used and modified. Thus, concurrent | ||
* access to the outer autodiff stack is unsafe while a | ||
* coupled_ode_system instance is in use. | ||
* | ||
* Finally, since the parameter vector is part of the outer autodiff | ||
* stack, the set_zero_adjoint_nested() call does not set these | ||
* adjoints to zero which is why these are zeroed in an extra loop | ||
* following the set_zero_adjoint_nested() call. | ||
* | ||
* @tparam F the functor for the base ode system | ||
*/ | ||
template <typename F> | ||
struct coupled_ode_system<F, var, var> { | ||
const F& f_; | ||
const std::vector<var>& y0_; | ||
const std::vector<double> y0_dbl_; | ||
const std::vector<var>& theta_; | ||
const std::vector<double> theta_dbl_; | ||
std::vector<double> theta_adj_; | ||
const std::vector<double>& x_; | ||
const std::vector<int>& x_int_; | ||
const size_t N_; | ||
|
@@ -443,15 +492,25 @@ struct coupled_ode_system<F, var, var> { | |
const std::vector<int>& x_int, std::ostream* msgs) | ||
: f_(f), | ||
y0_(y0), | ||
y0_dbl_(value_of(y0)), | ||
theta_(theta), | ||
theta_dbl_(value_of(theta)), | ||
theta_adj_(theta.size()), | ||
x_(x), | ||
x_int_(x_int), | ||
N_(y0.size()), | ||
M_(theta.size()), | ||
size_(N_ + N_ * (N_ + M_)), | ||
msgs_(msgs) {} | ||
msgs_(msgs) { | ||
for (size_t j = 0; j < M_; ++j) { | ||
theta_adj_[j] = theta_[j].adj(); | ||
theta_[j].vi_->set_zero_adjoint(); | ||
} | ||
} | ||
|
||
~coupled_ode_system() { | ||
for (size_t j = 0; j < M_; ++j) { | ||
theta_[j].vi_->adj_ = theta_adj_[j]; | ||
} | ||
} | ||
|
||
/** | ||
* Populates the derivative vector with derivatives of the | ||
|
@@ -476,11 +535,9 @@ struct coupled_ode_system<F, var, var> { | |
try { | ||
start_nested(); | ||
|
||
vector<var> y_vars(z.begin(), z.begin() + N_); | ||
const vector<var> y_vars(z.begin(), z.begin() + N_); | ||
|
||
vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end()); | ||
|
||
vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_); | ||
vector<var> dy_dt_vars = f_(t, y_vars, theta_, x_, x_int_, msgs_); | ||
|
||
check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(), | ||
"states", N_); | ||
|
@@ -502,7 +559,7 @@ struct coupled_ode_system<F, var, var> { | |
} | ||
|
||
for (size_t j = 0; j < M_; j++) { | ||
double temp_deriv = theta_vars[j].adj(); | ||
double temp_deriv = theta_[j].adj(); | ||
const size_t offset = N_ + N_ * N_ + N_ * j; | ||
for (size_t k = 0; k < N_; k++) | ||
temp_deriv += z[offset + k] * y_vars[k].adj(); | ||
|
@@ -511,11 +568,18 @@ struct coupled_ode_system<F, var, var> { | |
} | ||
|
||
set_zero_all_adjoints_nested(); | ||
// Parameters stored on the outer (non-nested) nochain stack are not | ||
// reset to zero by the last call. This is done as a separate step here. | ||
// See efficiency note above on template specalization for more details | ||
// on this. | ||
for (size_t j = 0; j < M_; ++j) | ||
theta_[j].vi_->set_zero_adjoint(); | ||
} | ||
} catch (const std::exception& e) { | ||
recover_memory_nested(); | ||
throw; | ||
} | ||
|
||
recover_memory_nested(); | ||
} | ||
|
||
|
@@ -540,7 +604,7 @@ struct coupled_ode_system<F, var, var> { | |
std::vector<double> initial_state() const { | ||
std::vector<double> initial(size_, 0.0); | ||
for (size_t i = 0; i < N_; i++) | ||
initial[i] = y0_dbl_[i]; | ||
initial[i] = value_of(y0_[i]); | ||
for (size_t i = 0; i < N_; i++) | ||
initial[N_ + i * N_ + i] = 1.0; | ||
return initial; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand what you're trying to say here. Can you write that in plain words and maybe I can help?
It really seems like we should warn that this function cannot be used in parallel. Meaning that nothing can touch the autodiff stack while this function is in use. (Do I have that condition right?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is meant by "the adjoints of the parameter vector are used for Jacobian calculations"? And how does that interact with concurrency?