stan-dev · SteveBronder · Mar 4, 2020 · Mar 4, 2020 · Mar 29, 2020 · Mar 29, 2020
diff --git a/stan/math/laplace/block_matrix_sqrt.hpp b/stan/math/laplace/block_matrix_sqrt.hpp
@@ -0,0 +1,52 @@
+#ifndef STAN_MATH_LAPLACE_BLOCK_MATRIX_SQRT_HPP
+#define STAN_MATH_LAPLACE_BLOCK_MATRIX_SQRT_HPP
+
+#include <stan/math/mix.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <unsupported/Eigen/MatrixFunctions>
+// REMOVE ME
+#include <iostream>
+
+namespace stan {
+namespace math {
+
+/**
+ * Return the matrix square-root for a block diagonal matrix.
+ */
+Eigen::SparseMatrix<double> inline block_matrix_sqrt(
+    Eigen::SparseMatrix<double> W, int block_size) {
+  int n_block = W.cols() / block_size;
+  Eigen::MatrixXd local_block(block_size, block_size);
+  Eigen::MatrixXd local_block_sqrt(block_size, block_size);
+  Eigen::SparseMatrix<double> W_root(W.rows(), W.cols());
+  W_root.reserve(Eigen::VectorXi::Constant(W_root.cols(), block_size));
+
+  // No block operation available for sparse matrices, so we have to loop.
+  // See https://eigen.tuxfamily.org/dox/group__TutorialSparse.html#title7
+  for (int i = 0; i < n_block; i++) {
+    for (int j = 0; j < block_size; j++) {
+      for (int k = 0; k < block_size; k++) {
+        local_block(j, k) = W.coeffRef(i * block_size + j, i * block_size + k);
+      }
+    }
+
+    local_block_sqrt = local_block.sqrt();
+    // local_block_sqrt = cholesky_decompose(local_block);
+
+    for (int j = 0; j < block_size; j++) {
+      for (int k = 0; k < block_size; k++) {
+        W_root.insert(i * block_size + j, i * block_size + k)
+            = local_block_sqrt(j, k);
+      }
+    }
+  }
+
+  W_root.makeCompressed();
+
+  return W_root;
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
diff --git a/stan/math/laplace/hessian_block_diag.hpp b/stan/math/laplace/hessian_block_diag.hpp
@@ -0,0 +1,86 @@
+#ifndef STAN_MATH_LAPLACE_HESSIAN_BLOCK_DIAG_HPP
+#define STAN_MATH_LAPLACE_HESSIAN_BLOCK_DIAG_HPP
+
+// TODO: refine include.
+#include <stan/math/mix.hpp>
+#include <stan/math/laplace/hessian_times_vector.hpp>
+#include <Eigen/Sparse>
+
+namespace stan {
+namespace math {
+
+/**
+ * Returns a block diagonal Hessian by computing the relevant directional
+ * derivatives and storing them in a matrix.
+ * For m the size of each block, the operations const m calls to
+ * hessian_times_vector, that is m forward sweeps and m reverse sweeps.
+ */
+template <typename F>
+inline void hessian_block_diag(const F& f, const Eigen::VectorXd& x,
+                               const Eigen::VectorXd& eta,
+                               const Eigen::VectorXd& delta,
+                               const std::vector<int>& delta_int,
+                               int hessian_block_size, double& fx,
+                               Eigen::MatrixXd& H, std::ostream* pstream = 0) {
+  using Eigen::MatrixXd;
+  using Eigen::VectorXd;
+
+  int x_size = x.size();
+  VectorXd v;
+  H = MatrixXd::Zero(x_size, x_size);
+  int n_blocks = x_size / hessian_block_size;
+  for (int i = 0; i < hessian_block_size; ++i) {
+    v = VectorXd::Zero(x_size);
+    for (int j = i; j < x_size; j += hessian_block_size)
+      v(j) = 1;
+    VectorXd Hv;
+    hessian_times_vector(f, x, eta, delta, delta_int, v, fx, Hv, pstream);
+    for (int j = 0; j < n_blocks; ++j) {
+      for (int k = 0; k < hessian_block_size; ++k)
+        H(k + j * hessian_block_size, i + j * hessian_block_size)
+            = Hv(k + j * hessian_block_size);
+    }
+  }
+}
+
+/**
+ * Overload for case where hessian is stored as a sparse matrix.
+ */
+template <typename F>
+inline void hessian_block_diag(const F& f, const Eigen::VectorXd& x,
+                               const Eigen::VectorXd& eta,
+                               const Eigen::VectorXd& delta,
+                               const std::vector<int>& delta_int,
+                               int hessian_block_size, double& fx,
+                               Eigen::SparseMatrix<double>& H,
+                               // Eigen::MatrixXd& H,
+                               std::ostream* pstream = 0) {
+  using Eigen::MatrixXd;
+  using Eigen::VectorXd;
+
+  int x_size = x.size();
+  VectorXd v;
+  // H = MatrixXd::Zero(x_size, x_size);
+  H.resize(x_size, x_size);
+  // H.reserve(Eigen::VectorXi::Constant(x_size, hessian_block_size));
+
+  int n_blocks = x_size / hessian_block_size;
+  for (int i = 0; i < hessian_block_size; ++i) {
+    v = VectorXd::Zero(x_size);
+    for (int j = i; j < x_size; j += hessian_block_size)
+      v(j) = 1;
+    VectorXd Hv;
+    hessian_times_vector(f, x, eta, delta, delta_int, v, fx, Hv, pstream);
+    for (int j = 0; j < n_blocks; ++j) {
+      for (int k = 0; k < hessian_block_size; ++k) {
+        H.insert(k + j * hessian_block_size, i + j * hessian_block_size)
+            = Hv(k + j * hessian_block_size);
+      }
+    }
+  }
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
diff --git a/stan/math/laplace/hessian_times_vector.hpp b/stan/math/laplace/hessian_times_vector.hpp
@@ -0,0 +1,44 @@
+#ifndef STAN_MATH_LAPLACE_HESSIAN_TIMES_VECTOR_HPP
+#define STAN_MATH_LAPLACE_HESSIAN_TIMES_VECTOR_HPP
+
+// TODO: refine include.
+#include <stan/math/mix.hpp>
+
+namespace stan {
+namespace math {
+
+/**
+ * Overload Hessian_times_vector function, under stan/math/mix/functor
+ * to handle functions which take in arguments eta, delta, delta_int,
+ * and pstream.
+ */
+template <typename F>
+inline void hessian_times_vector(const F& f, const Eigen::VectorXd& x,
+                                 const Eigen::VectorXd& eta,
+                                 const Eigen::VectorXd& delta,
+                                 const std::vector<int>& delta_int,
+                                 const Eigen::VectorXd& v, double& fx,
+                                 Eigen::VectorXd& Hv,
+                                 std::ostream* pstream = 0) {
+  using Eigen::Dynamic;
+  using Eigen::Matrix;
+
+  nested_rev_autodiff nested;
+
+  int x_size = x.size();
+  Matrix<var, Dynamic, 1> x_var = x;
+  Matrix<fvar<var>, Dynamic, 1> x_fvar(x_size);
+  for (int i = 0; i < x_size; i++) {
+    x_fvar(i) = fvar<var>(x_var(i), v(i));
+  }
+  fvar<var> fx_fvar = f(x_fvar, eta, delta, delta_int, pstream);
+  grad(fx_fvar.d_.vi_);
+  Hv.resize(x_size);
+  for (int i = 0; i < x_size; i++)
+    Hv(i) = x_var(i).adj();
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
diff --git a/stan/math/laplace/laplace.hpp b/stan/math/laplace/laplace.hpp
@@ -0,0 +1,26 @@
+#ifndef STAN_MATH_LAPLACE_LAPLACE_HPP
+#define STAN_MATH_LAPLACE_LAPLACE_HPP
+
+#include <stan/math/mix.hpp>
+#include <stan/math/laplace/block_matrix_sqrt.hpp>
+#include <stan/math/laplace/hessian_block_diag.hpp>
+#include <stan/math/laplace/hessian_times_vector.hpp>
+#include <stan/math/laplace/laplace_likelihood_bernoulli_logit.hpp>
+#include <stan/math/laplace/laplace_likelihood_general.hpp>
+#include <stan/math/laplace/laplace_likelihood_poisson_log.hpp>
+#include <stan/math/laplace/laplace_likelihood_neg_binomial_2_log.hpp>
+#include <stan/math/laplace/laplace_marginal.hpp>
+#include <stan/math/laplace/laplace_marginal_neg_binomial_2.hpp>
+#include <stan/math/laplace/laplace_likelihood_deprecated.hpp>
+#include <stan/math/laplace/laplace_marginal_bernoulli_logit_lpmf.hpp>
+#include <stan/math/laplace/laplace_marginal_lpdf.hpp>
+#include <stan/math/laplace/laplace_marginal_poisson_log_lpmf.hpp>
+#include <stan/math/laplace/laplace_pseudo_target.hpp>
+#include <stan/math/laplace/third_diff_directional.hpp>
+#include <stan/math/laplace/partial_diff_theta.hpp>
+#include <stan/math/laplace/prob/laplace_base_rng.hpp>
+#include <stan/math/laplace/prob/laplace_bernoulli_logit_rng.hpp>
+#include <stan/math/laplace/prob/laplace_poisson_log_rng.hpp>
+#include <stan/math/laplace/prob/laplace_rng.hpp>
+
+#endif
diff --git a/stan/math/laplace/laplace_likelihood_bernoulli_logit.hpp b/stan/math/laplace/laplace_likelihood_bernoulli_logit.hpp
@@ -0,0 +1,114 @@
+#ifndef STAN_MATH_LAPLACE_LAPLACE_LIKELIHOOD_BERNOULLI_LOGIT_HPP
+#define STAN_MATH_LAPLACE_LAPLACE_LIKELIHOOD_BERNOULLI_LOGIT_HPP
+
+namespace stan {
+namespace math {
+
+/**
+ * A structure to compute the log density, first, second,
+ * and third-order derivatives for a Bernoulli logistic likelihood
+ * whith multiple groups.
+ * This structure can be passed to the the laplace_marginal function.
+ * Uses sufficient statistics for the data.
+ */
+struct diff_bernoulli_logit {
+  /* The number of samples in each group. */
+  Eigen::VectorXd n_samples_;
+  /* The sum of counts in each group. */
+  Eigen::VectorXd sums_;
+
+  diff_bernoulli_logit(const Eigen::VectorXd& n_samples,
+                       const Eigen::VectorXd& sums)
+      : n_samples_(n_samples), sums_(sums) {}
+
+  /**
+   * Return the log density.
+   * @tparam T type of the log poisson parameter.
+   * @param[in] theta log poisson parameters for each group.
+   * @return the log density.
+   */
+  template <typename T1, typename T2>
+  T1 log_likelihood(
+      const Eigen::Matrix<T1, Eigen::Dynamic, 1>& theta,
+      const Eigen::Matrix<T2, Eigen::Dynamic, 1>& eta_dummy) const {
+    Eigen::VectorXd one = rep_vector(1, theta.size());
+    return sum(theta.cwiseProduct(sums_)
+               - n_samples_.cwiseProduct(log(one + exp(theta))));
+  }
+
+  /**
+   * Returns the gradient of the log density, and the hessian.
+   * Since the latter is diagonal, it is stored inside a vector.
+   * The two objects are computed together, because we always use
+   * both when solving the Newton iteration of the Laplace
+   * approximation, and to avoid redundant computation.
+   * @tparam T type of the Bernoulli logistic parameter.
+   * @param[in] theta Bernoulli logistic parameters for each group.
+   * @param[in, out] gradient
+   * @param[in, out] hessian diagonal, so stored in a vector.
+   */
+  template <typename T1, typename T2>
+  void diff(const Eigen::Matrix<T1, Eigen::Dynamic, 1>& theta,
+            const Eigen::Matrix<T2, Eigen::Dynamic, 1>& eta_dummy,
+            Eigen::Matrix<T1, Eigen::Dynamic, 1>& gradient,
+            Eigen::SparseMatrix<double>& hessian,
+            int block_size_dummy = 0) const {
+    Eigen::Matrix<T1, Eigen::Dynamic, 1> exp_theta = exp(theta);
+    int theta_size = theta.size();
+    Eigen::VectorXd one = rep_vector(1, theta_size);
+
+    gradient = sums_ - n_samples_.cwiseProduct(inv_logit(theta));
+
+    Eigen::Matrix<T1, Eigen::Dynamic, 1> hessian_diagonal
+        = -n_samples_.cwiseProduct(
+            elt_divide(exp_theta, square(one + exp_theta)));
+    hessian.resize(theta_size, theta_size);
+    hessian.reserve(Eigen::VectorXi::Constant(theta_size, 1));
+    for (int i = 0; i < theta_size; i++)
+      hessian.insert(i, i) = hessian_diagonal(i);
+  }
+
+  /**
+   * Returns the third derivative tensor. Because it is (cubic) diagonal,
+   * the object is stored in a vector.
+   * @tparam T type of the log poisson parameter.
+   * @param[in] theta log poisson parameters for each group.
+   * @param[in] eta_dummy additional likelihood parameters (used for other lk)
+   * @return A vector containing the non-zero elements of the third
+   *         derivative tensor.
+   */
+  template <typename T1, typename T2>
+  Eigen::Matrix<T1, Eigen::Dynamic, 1> third_diff(
+      const Eigen::Matrix<T1, Eigen::Dynamic, 1>& theta,
+      const Eigen::Matrix<T2, Eigen::Dynamic, 1>& eta_dummy) const {
+    Eigen::VectorXd exp_theta = exp(theta);
+    Eigen::VectorXd one = rep_vector(1, theta.size());
+    Eigen::VectorXd nominator = exp_theta.cwiseProduct(exp_theta - one);
+    Eigen::VectorXd denominator
+        = square(one + exp_theta).cwiseProduct(one + exp_theta);
+
+    return n_samples_.cwiseProduct(elt_divide(nominator, denominator));
+  }
+
+  Eigen::VectorXd compute_s2(const Eigen::VectorXd& theta,
+                             const Eigen::VectorXd& eta,
+                             const Eigen::MatrixXd& A,
+                             int hessian_block_size) const {
+    std::cout << "THIS FUNCTIONS SHOULD NEVER GET CALLED!" << std::endl;
+    Eigen::MatrixXd void_matrix;
+    return void_matrix;
+  }
+
+  Eigen::VectorXd diff_eta_implicit(const Eigen::VectorXd& v,
+                                    const Eigen::VectorXd& theta,
+                                    const Eigen::VectorXd& eta) const {
+    std::cout << "THIS FUNCTIONS SHOULD NEVER GET CALLED!" << std::endl;
+    Eigen::MatrixXd void_matrix;
+    return void_matrix;
+  }
+};
+
+}  // namespace math
+}  // namespace stan
+
+#endif