Lots of updates (#416)

* incrementing dev version * updating news * Allow different initial values across particles also when using pairwise preference data (#406) * simplifications * used Cpp for all_topological_sorts. Much much faster, since it is recursive code * converted preferences to matrix for SMC * starting to set up preferences * done * styling * removed shuffle_unranked argument * restructured arguments to setup_rank_data for pairwise preferences * fixing some errors * removing unnecessary statement * fixed bug in augmented rankings for existing users * made a better progress reporter * updated news and description * fixed #407 (#408) * Fixing bug in Ulam distance (#411) * fixed bug and added test * simplifying * styling * Exporting exact partition function (#412) * fixing documentation typo * fixing #409 * Consistency checks with pairwise preferences (#414) * fixed issue with updated users with pairwise preferences * fixed #404 * improved cpp code for topological sorts * incrementing dev version * generating all topological sorts in random order * adding save=TRUE argument where necessary * incrementing and updating * updating cran comments * fixing a long-running example * fixing CodeFactor issues
ocbe-uio · Apr 19, 2024 · 49d459d · 49d459d
1 parent ad4b935
commit 49d459d
Show file tree

Hide file tree

Showing 15 changed files with 63 additions and 48 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: BayesMallows
 Type: Package
 Title: Bayesian Preference Learning with the Mallows Rank Model
-Version: 2.1.1.9007
+Version: 2.2.0
 Authors@R: c(person("Oystein", "Sorensen",
                     email = "oystein.sorensen.1985@gmail.com",
                     role = c("aut", "cre"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
-# BayesMallows (development versions)
+# BayesMallows 2.2.0
 
+* For initialization of latent ranks when using pairwise preference data, all
+  topological sorts are now generated in random order.
 * The SMC function now check for consistency with previous latent ranks for 
   existing users also when data arrive in the form of pairwise preferences.
 * A function compute_exact_partition_function() is now added, which returns the

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -5,8 +5,8 @@ abind <- function(x, y) {
     .Call(`_BayesMallows_abind`, x, y)
 }
 
-all_topological_sorts <- function(prefs, n_items, maxit = 1000L) {
-    .Call(`_BayesMallows_all_topological_sorts`, prefs, n_items, maxit)
+all_topological_sorts <- function(prefs, n_items, maxit, save) {
+    .Call(`_BayesMallows_all_topological_sorts`, prefs, n_items, maxit, save)
 }
 
 #' Asymptotic Approximation of Partition Function

diff --git a/R/estimate_partition_function.R b/R/estimate_partition_function.R
@@ -122,7 +122,7 @@ extract_pfun_values <- function(metric, n_items, pfun_estimate) {
           "estimate in argument pfun_estimate."
         )
       } else {
-        return(NULL)
+        NULL
       }
     }
   )

diff --git a/R/generate_initial_ranking.R b/R/generate_initial_ranking.R
@@ -42,7 +42,7 @@ generate_initial_ranking.BayesMallowsIntransitive <- function(
 }
 
 create_ranks <- function(mat, n_items, max_topological_sorts) {
-  ret <- all_topological_sorts(mat, n_items, max_topological_sorts)
+  ret <- all_topological_sorts(mat, n_items, max_topological_sorts, TRUE)
   u <- sample(min(max_topological_sorts, nrow(ret)), 1)
   ret <- ret[u, ]
   all_items <- seq(from = 1, to = n_items, by = 1)

diff --git a/R/generate_transitive_closure.R b/R/generate_transitive_closure.R
@@ -53,7 +53,5 @@ generate_transitive_closure <- function(preferences, cl = NULL) {
     bottom_item = row_inds[new_mat[, 1, drop = FALSE]],
     top_item = row_inds[new_mat[, 2, drop = FALSE]]
   )
-
-
-  return(result)
+  result
 }
diff --git a/cran-comments.md b/cran-comments.md
@@ -1,15 +1,13 @@
 ## Resubmission Note
 
-This is a resubmission, fixing the gcc-UBSAN error reported on CRAN.
+This is a resubmission containing a large number of new features.
 
 
 ## Test Environments
 
-* Local Ubuntu 22.04, R 4.3.2, running R CMD check with --use-valgrind option.
 * Local Ubuntu 23.04 with R 4.3.2 built from source with option "--with-valgrind-instrumentation=2", running R CMD check with --use-valgrind option.
-* r-devel-san via rocker/r-devel-san, running R CMD check with --use-valgrind option.
 * r-devel-san via rocker/r-devel-san.
-* local Windows install, R 4.3.2.
+* local Windows install, R 4.3.3.
 * windows, devel, release and old-release.
 * R-CMD-check via GitHub Actions on windows-latest, macOS-latest, ubuntu-20.04 (release), and ubuntu-20.04 (devel).
 * M1 builder.

diff --git a/inst/examples/update_mallows_example.R b/inst/examples/update_mallows_example.R
@@ -1,3 +1,4 @@
+\dontrun{
 set.seed(1)
 # UPDATING A MALLOWS MODEL WITH NEW COMPLETE RANKINGS
 # Assume we first only observe the first four rankings in the potato_visual
@@ -126,3 +127,4 @@ for(i in 21:24){
 # MCMC steps, and the latent sampling lag.
 plot(mod)
 compute_consensus(mod)
+}
diff --git a/man/plot.SMCMallows.Rd b/man/plot.SMCMallows.Rd
diff --git a/man/update_mallows.Rd b/man/update_mallows.Rd
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -24,15 +24,16 @@ BEGIN_RCPP
 END_RCPP
 }
 // all_topological_sorts
-arma::imat all_topological_sorts(arma::imat prefs, int n_items, int maxit);
-RcppExport SEXP _BayesMallows_all_topological_sorts(SEXP prefsSEXP, SEXP n_itemsSEXP, SEXP maxitSEXP) {
+arma::imat all_topological_sorts(arma::imat prefs, int n_items, int maxit, bool save);
+RcppExport SEXP _BayesMallows_all_topological_sorts(SEXP prefsSEXP, SEXP n_itemsSEXP, SEXP maxitSEXP, SEXP saveSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< arma::imat >::type prefs(prefsSEXP);
     Rcpp::traits::input_parameter< int >::type n_items(n_itemsSEXP);
     Rcpp::traits::input_parameter< int >::type maxit(maxitSEXP);
-    rcpp_result_gen = Rcpp::wrap(all_topological_sorts(prefs, n_items, maxit));
+    Rcpp::traits::input_parameter< bool >::type save(saveSEXP);
+    rcpp_result_gen = Rcpp::wrap(all_topological_sorts(prefs, n_items, maxit, save));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -164,7 +165,7 @@ END_RCPP
 
 static const R_CallMethodDef CallEntries[] = {
     {"_BayesMallows_abind", (DL_FUNC) &_BayesMallows_abind, 2},
-    {"_BayesMallows_all_topological_sorts", (DL_FUNC) &_BayesMallows_all_topological_sorts, 3},
+    {"_BayesMallows_all_topological_sorts", (DL_FUNC) &_BayesMallows_all_topological_sorts, 4},
     {"_BayesMallows_asymptotic_partition_function", (DL_FUNC) &_BayesMallows_asymptotic_partition_function, 6},
     {"_BayesMallows_get_rank_distance", (DL_FUNC) &_BayesMallows_get_rank_distance, 3},
     {"_BayesMallows_compute_importance_sampling_estimate", (DL_FUNC) &_BayesMallows_compute_importance_sampling_estimate, 4},

diff --git a/src/all_topological_sorts.cpp b/src/all_topological_sorts.cpp
@@ -13,19 +13,20 @@ class Graph {
   int n_items;
   list<int> *adj;
   vector<int> indegree;
-  void alltopologicalSortUtil(vector<int>& res, bool visited[]);
+  void alltopologicalSortUtil(vector<int>& res, vector<bool>& visited);
   int maxit;
-  int iter{};
+  bool save;
 
 public:
-  Graph(int n_items, int maxit);
+  Graph(int n_items, int maxit, bool save);
   void addEdge(int v, int w);
   void alltopologicalSort();
-  std::vector<std::vector<int>> m;
+  vector<vector<int>> m;
+  int iter{};
 };
 
-Graph::Graph(int n_items, int maxit) : n_items { n_items },
-  maxit { maxit } {
+Graph::Graph(int n_items, int maxit, bool save) : n_items { n_items },
+  maxit { maxit }, save { save } {
   adj = new list<int>[n_items];
   for (int i = 0; i < n_items; i++) indegree.push_back(0);
 }
@@ -35,10 +36,11 @@ void Graph::addEdge(int v, int w) {
   indegree[w]++;
 }
 
-void Graph::alltopologicalSortUtil(vector<int>& res, bool visited[]) {
+void Graph::alltopologicalSortUtil(vector<int>& res, vector<bool>& visited) {
   bool flag = false;
+  Rcpp::IntegerVector visit_order = Rcpp::sample(n_items, n_items) - 1;
 
-  for (int i = 0; i < n_items; i++) {
+  for (int i : visit_order) {
     if (indegree[i] == 0 && !visited[i]) {
       list<int>:: iterator j;
       for (j = adj[i].begin(); j != adj[i].end(); j++)
@@ -63,33 +65,40 @@ void Graph::alltopologicalSortUtil(vector<int>& res, bool visited[]) {
 
   if (!flag){
     iter++;
-    m.push_back(res);
+    if(save) {
+      m.push_back(res);
+    }
   }
 }
 
 void Graph::alltopologicalSort() {
-  bool *visited = new bool[n_items];
-  for (int i = 0; i < n_items; i++)
-    visited[i] = false;
-
+  vector<bool> visited;
+  visited.resize(n_items);
+  fill(visited.begin(), visited.end(), false);
   vector<int> res;
   alltopologicalSortUtil(res, visited);
 }
 
 // [[Rcpp::export]]
-arma::imat all_topological_sorts(arma::imat prefs, int n_items, int maxit = 1000) {
-  Graph g(n_items, maxit);
+arma::imat all_topological_sorts(arma::imat prefs, int n_items, int maxit,
+                                 bool save) {
+  Graph g(n_items, maxit, save);
   for(size_t i{}; i < prefs.n_rows; i++) {
     g.addEdge(prefs.at(i, 1) - 1, prefs.at(i, 0) - 1);
   }
   g.alltopologicalSort();
 
-  arma::imat m(g.m.size(), n_items);
-  for(size_t i{}; i < m.n_rows; i++) {
-    for(size_t j{}; j < m.n_cols; j++) {
-      m(i, j) = g.m[i][j] + 1;
+  if(save) {
+    arma::imat m(g.m.size(), n_items);
+    for(size_t i{}; i < m.n_rows; i++) {
+      for(size_t j{}; j < m.n_cols; j++) {
+        m(i, j) = g.m[i][j] + 1;
+      }
     }
+    return m;
+  } else {
+    arma::imat m(1, 1);
+    m(0, 0) = g.iter;
+    return m;
   }
-
-  return m;
 }
diff --git a/src/all_topological_sorts.h b/src/all_topological_sorts.h
@@ -1,2 +1,3 @@
 #pragma once
-arma::imat all_topological_sorts(arma::imat prefs, int n_items, int maxit);
+arma::imat all_topological_sorts(arma::imat prefs, int n_items, int maxit = 1000,
+                                 bool save = true);
diff --git a/tests/testthat/test-plot_top_k.R b/tests/testthat/test-plot_top_k.R
@@ -8,7 +8,7 @@ test_that("predict_top_k works", {
   )
 
   ptk <- predict_top_k(model_fit)
-  expect_equal(ptk$prob[[900]], .06)
+  expect_equal(ptk$prob[[900]], .12)
   expect_equal(dim(ptk), c(900, 3))
 
   p <- plot_top_k(model_fit)

diff --git a/tests/testthat/test-smc_pairwise.R b/tests/testthat/test-smc_pairwise.R
@@ -21,8 +21,8 @@ test_that("update_mallows works with pairwise preferences", {
     )
   }
 
-  expect_equal(mean(mod$alpha_samples), 3.99645235790935)
-  expect_equal(sd(mod$alpha_samples), 0.872720087078698)
+  expect_equal(mean(mod$alpha_samples), 3.23002172007725)
+  expect_equal(sd(mod$alpha_samples), 0.687078638322223)
 
   mod <- mod_init
   for (i in 23:24) {
@@ -36,8 +36,8 @@ test_that("update_mallows works with pairwise preferences", {
     )
   }
 
-  expect_equal(mean(mod$alpha_samples), 3.03458072249661)
-  expect_equal(sd(mod$alpha_samples), 0.670792690537946)
+  expect_equal(mean(mod$alpha_samples), 2.87132006142477)
+  expect_equal(sd(mod$alpha_samples), 0.581863142256992)
 })
 
 test_that("update_mallows works with existing users updating their data", {
@@ -58,12 +58,12 @@ test_that("update_mallows works with existing users updating their data", {
 
   expect_equal(
     mean(m1$alpha_samples),
-    2.75891148770247
+    1.80598634638498
   )
 
   expect_equal(
     order(apply(m1$augmented_rankings, 1, mean)),
-    c(1L, 6L, 9L, 3L, 7L, 4L, 10L, 11L, 8L, 12L, 15L, 13L, 14L, 2L, 5L)
+    c(15L, 6L, 2L, 3L, 12L, 11L, 9L, 13L, 10L, 14L, 1L, 8L, 5L, 7L, 4L)
   )
 
   m2 <- update_mallows(
@@ -76,12 +76,12 @@ test_that("update_mallows works with existing users updating their data", {
 
   expect_equal(
     mean(m2$alpha_samples),
-    2.79625839950885
+    2.01294805363435
   )
 
   expect_equal(
     order(apply(m2$augmented_rankings, 1, mean)),
-    c(6L, 4L, 10L, 1L, 2L, 7L, 8L, 11L, 5L, 3L, 12L, 9L, 13L, 15L, 14L)
+    c(6L, 11L, 2L, 8L, 4L, 12L, 10L, 14L, 15L, 3L, 1L, 9L, 13L, 7L, 5L)
   )
 })