stan-dev · wds15 · May 10, 2019 · Nov 7, 2018 · Nov 7, 2018 · Nov 17, 2018
diff --git a/stan/math/rev/arr/functor/coupled_ode_system.hpp b/stan/math/rev/arr/functor/coupled_ode_system.hpp
@@ -37,6 +37,20 @@ namespace math {
  * to the second base system equation, and so on through the last base
  * system equation.
  *
+ * <p>Note: Calculating the sensitivity system requires the Jacobian
+ * of the base ODE RHS wrt to the parameters theta. The parameter
+ * vector theta is constant for successive calls to the exposed
+ * operator(). For this reason, the parameter vector theta is copied
+ * upon construction onto the nochain var autodiff tape which is used
+ * in the the nested autodiff performed in the operator() of this
+ * adaptor. Doing so reduces the size of the nested autodiff and
+ * speeds up autodiff. As a side effect, the parameter vector theta
+ * will remain on the nochain autodiff part of the autodiff tape being
+ * in use even after destruction of the given instance. Moreover, the
+ * adjoint zeroing for the nested system does not cover the theta
+ * parameter vector part of the nochain autodiff tape and is therefore
+ * set to zero using a dedicated loop.
+ *
  * @tparam F base ode system functor. Must provide
  *   <code>operator()(double t, std::vector<double> y, std::vector<var> theta,
  *          std::vector<double> x, std::vector<int>x_int, std::ostream*
@@ -47,7 +61,7 @@ struct coupled_ode_system<F, double, var> {
   const F& f_;
   const std::vector<double>& y0_dbl_;
   const std::vector<var>& theta_;
-  const std::vector<double> theta_dbl_;
+  std::vector<var> theta_nochain_;
   const std::vector<double>& x_;
   const std::vector<int>& x_int_;
   const size_t N_;
@@ -74,13 +88,15 @@ struct coupled_ode_system<F, double, var> {
       : f_(f),
         y0_dbl_(y0),
         theta_(theta),
-        theta_dbl_(value_of(theta)),
         x_(x),
         x_int_(x_int),
         N_(y0.size()),
         M_(theta.size()),
         size_(N_ + N_ * M_),
-        msgs_(msgs) {}
+        msgs_(msgs) {
+    for (const var& p : theta)
+      theta_nochain_.emplace_back(var(new vari(p.val(), false)));
+  }
 
   /**
    * Calculates the derivative of the coupled ode system with respect
@@ -103,11 +119,9 @@ struct coupled_ode_system<F, double, var> {
     try {
       start_nested();
 
-      vector<var> y_vars(z.begin(), z.begin() + N_);
-
-      vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end());
+      const vector<var> y_vars(z.begin(), z.begin() + N_);
 
-      vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_);
+      vector<var> dy_dt_vars = f_(t, y_vars, theta_nochain_, x_, x_int_, msgs_);
 
       check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(),
                        "states", N_);
@@ -120,7 +134,7 @@ struct coupled_ode_system<F, double, var> {
           // orders derivatives by equation (i.e. if there are 2 eqns
           // (y1, y2) and 2 parameters (a, b), dy_dt will be ordered as:
           // dy1_dt, dy2_dt, dy1_da, dy2_da, dy1_db, dy2_db
-          double temp_deriv = theta_vars[j].adj();
+          double temp_deriv = theta_nochain_[j].adj();
           const size_t offset = N_ + N_ * j;
           for (size_t k = 0; k < N_; k++)
             temp_deriv += z[offset + k] * y_vars[k].adj();
@@ -129,6 +143,12 @@ struct coupled_ode_system<F, double, var> {
         }
 
         set_zero_all_adjoints_nested();
+        // Parameters stored on the outer (non-nested) nochain stack are not
+        // reset to zero by the last call. This is done as a separate step here.
+        // See efficiency note above on template specalization for more details
+        // on this.
+        for (size_t j = 0; j < M_; ++j)
+          theta_nochain_[j].vi_->set_zero_adjoint();
       }
     } catch (const std::exception& e) {
       recover_memory_nested();
@@ -226,7 +246,6 @@ template <typename F>
 struct coupled_ode_system<F, var, double> {
   const F& f_;
   const std::vector<var>& y0_;
-  const std::vector<double> y0_dbl_;
   const std::vector<double>& theta_dbl_;
   const std::vector<double>& x_;
   const std::vector<int>& x_int_;
@@ -253,7 +272,6 @@ struct coupled_ode_system<F, var, double> {
                      const std::vector<int>& x_int, std::ostream* msgs)
       : f_(f),
         y0_(y0),
-        y0_dbl_(value_of(y0)),
         theta_dbl_(theta),
         x_(x),
         x_int_(x_int),
@@ -283,7 +301,7 @@ struct coupled_ode_system<F, var, double> {
     try {
       start_nested();
 
-      vector<var> y_vars(z.begin(), z.begin() + N_);
+      const vector<var> y_vars(z.begin(), z.begin() + N_);
 
       vector<var> dy_dt_vars = f_(t, y_vars, theta_dbl_, x_, x_int_, msgs_);
 
@@ -339,7 +357,7 @@ struct coupled_ode_system<F, var, double> {
   std::vector<double> initial_state() const {
     std::vector<double> initial(size_, 0.0);
     for (size_t i = 0; i < N_; i++)
-      initial[i] = y0_dbl_[i];
+      initial[i] = value_of(y0_[i]);
     for (size_t i = 0; i < N_; i++)
       initial[N_ + i * N_ + i] = 1.0;
     return initial;
@@ -406,6 +424,20 @@ struct coupled_ode_system<F, var, double> {
  * parameters with respect to the second base system equation, and
  * so on through the last base system equation.
  *
+ * <p>Note: Calculating the sensitivity system requires the Jacobian
+ * of the base ODE RHS wrt to the parameters theta. The parameter
+ * vector theta is constant for successive calls to the exposed
+ * operator(). For this reason, the parameter vector theta is copied
+ * upon construction onto the nochain var autodiff tape which is used
+ * in the the nested autodiff performed in the operator() of this
+ * adaptor. Doing so reduces the size of the nested autodiff and
+ * speeds up autodiff. As a side effect, the parameter vector theta
+ * will remain on the nochain autodiff part of the autodiff tape being
+ * in use even after destruction of the given instance. Moreover, the
+ * adjoint zeroing for the nested system does not cover the theta
+ * parameter vector part of the nochain autodiff tape and is therefore
+ * set to zero using a dedicated loop.
+ *
  * @tparam F base ode system functor. Must provide
  *   <code>operator()(double t, std::vector<var> y, std::vector<var> theta,
  *          std::vector<double> x, std::vector<int>x_int, std::ostream*
@@ -415,9 +447,8 @@ template <typename F>
 struct coupled_ode_system<F, var, var> {
   const F& f_;
   const std::vector<var>& y0_;
-  const std::vector<double> y0_dbl_;
   const std::vector<var>& theta_;
-  const std::vector<double> theta_dbl_;
+  std::vector<var> theta_nochain_;
   const std::vector<double>& x_;
   const std::vector<int>& x_int_;
   const size_t N_;
@@ -443,15 +474,16 @@ struct coupled_ode_system<F, var, var> {
                      const std::vector<int>& x_int, std::ostream* msgs)
       : f_(f),
         y0_(y0),
-        y0_dbl_(value_of(y0)),
         theta_(theta),
-        theta_dbl_(value_of(theta)),
         x_(x),
         x_int_(x_int),
         N_(y0.size()),
         M_(theta.size()),
         size_(N_ + N_ * (N_ + M_)),
-        msgs_(msgs) {}
+        msgs_(msgs) {
+    for (const var& p : theta)
+      theta_nochain_.emplace_back(var(new vari(p.val(), false)));
+  }
 
   /**
    * Calculates the derivative of the coupled ode system with respect
@@ -474,11 +506,9 @@ struct coupled_ode_system<F, var, var> {
     try {
       start_nested();
 
-      vector<var> y_vars(z.begin(), z.begin() + N_);
-
-      vector<var> theta_vars(theta_dbl_.begin(), theta_dbl_.end());
+      const vector<var> y_vars(z.begin(), z.begin() + N_);
 
-      vector<var> dy_dt_vars = f_(t, y_vars, theta_vars, x_, x_int_, msgs_);
+      vector<var> dy_dt_vars = f_(t, y_vars, theta_nochain_, x_, x_int_, msgs_);
 
       check_size_match("coupled_ode_system", "dz_dt", dy_dt_vars.size(),
                        "states", N_);
@@ -500,7 +530,7 @@ struct coupled_ode_system<F, var, var> {
         }
 
         for (size_t j = 0; j < M_; j++) {
-          double temp_deriv = theta_vars[j].adj();
+          double temp_deriv = theta_nochain_[j].adj();
           const size_t offset = N_ + N_ * N_ + N_ * j;
           for (size_t k = 0; k < N_; k++)
             temp_deriv += z[offset + k] * y_vars[k].adj();
@@ -509,6 +539,12 @@ struct coupled_ode_system<F, var, var> {
         }
 
         set_zero_all_adjoints_nested();
+        // Parameters stored on the outer (non-nested) nochain stack are not
+        // reset to zero by the last call. This is done as a separate step here.
+        // See efficiency note above on template specalization for more details
+        // on this.
+        for (size_t j = 0; j < M_; ++j)
+          theta_nochain_[j].vi_->set_zero_adjoint();
       }
     } catch (const std::exception& e) {
       recover_memory_nested();
@@ -545,7 +581,7 @@ struct coupled_ode_system<F, var, var> {
   std::vector<double> initial_state() const {
     std::vector<double> initial(size_, 0.0);
     for (size_t i = 0; i < N_; i++)
-      initial[i] = y0_dbl_[i];
+      initial[i] = value_of(y0_[i]);
     for (size_t i = 0; i < N_; i++)
       initial[N_ + i * N_ + i] = 1.0;
     return initial;

diff --git a/stan/math/rev/core/var.hpp b/stan/math/rev/core/var.hpp
@@ -21,7 +21,7 @@ static void grad(vari* vi);
  * Independent (input) and dependent (output) variables for gradients.
  *
  * This class acts as a smart pointer, with resources managed by
- * an agenda-based memory manager scoped to a single gradient
+ * an arena-based memory manager scoped to a single gradient
  * calculation.
  *
  * An var is constructed with a double and used like any

diff --git a/stan/math/rev/mat/functor/cvodes_ode_data.hpp b/stan/math/rev/mat/functor/cvodes_ode_data.hpp
@@ -148,11 +148,11 @@ class cvodes_ode_data {
    */
   inline void rhs(double t, const double y[], double dy_dt[]) const {
     const std::vector<double> y_vec(y, y + N_);
-    const std::vector<double> dy_dt_vec
+    const std::vector<double>& dy_dt_vec
         = f_(t, y_vec, theta_dbl_, x_, x_int_, msgs_);
     check_size_match("cvodes_ode_data", "dz_dt", dy_dt_vec.size(), "states",
                      N_);
-    std::copy(dy_dt_vec.begin(), dy_dt_vec.end(), dy_dt);
+    std::move(dy_dt_vec.begin(), dy_dt_vec.end(), dy_dt);
   }
 
   /**
@@ -163,14 +163,13 @@ class cvodes_ode_data {
    * y to be the initial of the coupled ode system.
    */
   inline int jacobian_states(double t, const double y[], SUNMatrix J) const {
-    const std::vector<double> y_vec(y, y + N_);
     start_nested();
-    std::vector<var> y_vec_var(y_vec.begin(), y_vec.end());
+    const std::vector<var> y_vec_var(y, y + N_);
     coupled_ode_system<F, var, double> ode_jacobian(f_, y_vec_var, theta_dbl_,
                                                     x_, x_int_, msgs_);
-    std::vector<double> jacobian_y(ode_jacobian.size(), 0);
+    std::vector<double>&& jacobian_y = std::vector<double>(ode_jacobian.size());
     ode_jacobian(ode_jacobian.initial_state(), jacobian_y, t);
-    std::copy(jacobian_y.begin() + N_, jacobian_y.end(), SM_DATA_D(J));
+    std::move(jacobian_y.begin() + N_, jacobian_y.end(), SM_DATA_D(J));
     recover_memory_nested();
     return 0;
   }
@@ -184,14 +183,14 @@ class cvodes_ode_data {
   inline void rhs_sens(double t, const double y[], N_Vector* yS,
                        N_Vector* ySdot) const {
     std::vector<double> z(coupled_state_.size());
-    std::vector<double> dz_dt(coupled_state_.size());
+    std::vector<double>&& dz_dt = std::vector<double>(coupled_state_.size());
     std::copy(y, y + N_, z.begin());
     for (std::size_t s = 0; s < S_; s++)
       std::copy(NV_DATA_S(yS[s]), NV_DATA_S(yS[s]) + N_,
                 z.begin() + (s + 1) * N_);
     coupled_ode_(z, dz_dt, t);
     for (std::size_t s = 0; s < S_; s++)
-      std::copy(dz_dt.begin() + (s + 1) * N_, dz_dt.begin() + (s + 2) * N_,
+      std::move(dz_dt.begin() + (s + 1) * N_, dz_dt.begin() + (s + 2) * N_,
                 NV_DATA_S(ySdot[s]));
   }
 };

diff --git a/test/unit/math/rev/arr/functor/coupled_ode_system_test.cpp b/test/unit/math/rev/arr/functor/coupled_ode_system_test.cpp
@@ -18,6 +18,8 @@ struct StanAgradRevOde : public ::testing::Test {
 TEST_F(StanAgradRevOde, coupled_ode_system_dv) {
   using stan::math::coupled_ode_system;
 
+  stan::math::start_nested();
+
   harm_osc_ode_fun harm_osc;
 
   std::vector<stan::math::var> theta;
@@ -39,15 +41,22 @@ TEST_F(StanAgradRevOde, coupled_ode_system_dv) {
   z0.push_back(1.0);
   z0.push_back(2.0);
 
+  std::size_t stack_size = stan::math::nested_size();
+
   coupled_ode_system<harm_osc_ode_fun, double, stan::math::var> system(
       harm_osc, y0, theta, x, x_int, &msgs);
 
+  EXPECT_EQ(stack_size, stan::math::nested_size())
+      << "expecting no new things on the stack";
+
   system(z0, dz_dt, t0);
 
   EXPECT_FLOAT_EQ(0.5, dz_dt[0]);
   EXPECT_FLOAT_EQ(-1.075, dz_dt[1]);
   EXPECT_FLOAT_EQ(2, dz_dt[2]);
   EXPECT_FLOAT_EQ(-1.8, dz_dt[3]);
+
+  stan::math::recover_memory_nested();
 }
 TEST_F(StanAgradRevOde, decouple_states_dv) {
   using stan::math::coupled_ode_system;
@@ -193,6 +202,8 @@ TEST_F(StanAgradRevOde, memory_recovery_exception_dv) {
 TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
   using stan::math::coupled_ode_system;
 
+  stan::math::start_nested();
+
   harm_osc_ode_fun harm_osc;
 
   std::vector<double> theta;
@@ -219,9 +230,14 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
   y0_var.push_back(1.0);
   y0_var.push_back(0.5);
 
+  std::size_t stack_size = stan::math::nested_size();
+
   coupled_ode_system<harm_osc_ode_fun, stan::math::var, double> system(
       harm_osc, y0_var, theta, x, x_int, &msgs);
 
+  EXPECT_EQ(stack_size, stan::math::nested_size())
+      << "expecting no new things on the stack";
+
   system(z0, dz_dt, t0);
 
   EXPECT_FLOAT_EQ(0.5, dz_dt[0]);
@@ -230,6 +246,8 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vd) {
   EXPECT_FLOAT_EQ(-1.0 * 1.0 - 0.15 * 0.0, dz_dt[3]);
   EXPECT_FLOAT_EQ(0.0 * 0.0 + 1.0 * 1.0, dz_dt[4]);
   EXPECT_FLOAT_EQ(-1.0 * 0.0 - 0.15 * 1.0, dz_dt[5]);
+
+  stan::math::recover_memory_nested();
 }
 TEST_F(StanAgradRevOde, decouple_states_vd) {
   using stan::math::coupled_ode_system;
@@ -374,6 +392,7 @@ TEST_F(StanAgradRevOde, memory_recovery_exception_vd) {
 TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
   using stan::math::coupled_ode_system;
 
+  stan::math::start_nested();
   const size_t N = 2;
   const size_t M = 1;
   const size_t z_size = N + N * N + N * M;
@@ -386,9 +405,15 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
   theta_var.push_back(0.15);
 
   harm_osc_ode_fun harm_osc;
+
+  std::size_t stack_size = stan::math::nested_size();
+
   coupled_ode_system<harm_osc_ode_fun, stan::math::var, stan::math::var> system(
       harm_osc, y0_var, theta_var, x, x_int, &msgs);
 
+  EXPECT_EQ(stack_size, stan::math::nested_size())
+      << "expecting no new things on the stack";
+
   std::vector<double> z0(z_size, 0);
   z0[0] = 1.0;
   z0[1] = 0.5;
@@ -419,6 +444,8 @@ TEST_F(StanAgradRevOde, coupled_ode_system_vv) {
   EXPECT_FLOAT_EQ(-0.15, dz_dt[5]);
   EXPECT_FLOAT_EQ(0, dz_dt[6]);
   EXPECT_FLOAT_EQ(-0.5, dz_dt[7]);
+
+  stan::math::recover_memory_nested();
 }
 TEST_F(StanAgradRevOde, decouple_states_vv) {
   using stan::math::coupled_ode_system;