ocbe-uio · osorensen · Feb 28, 2024 · Jan 19, 2024 · Jan 19, 2024 · Jan 19, 2024
diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
@@ -41,19 +41,7 @@ jobs:
           run: |
             library(lintr)
             excluded_files <- list(
-              "inst/examples/compute_consensus_example.R",
-              "inst/examples/compute_mallows_example.R",
-              "inst/examples/compute_mallows_mixtures_example.R",
-              "inst/examples/compute_posterior_intervals_example.R",
-              "inst/examples/estimate_partition_function_example.R",
-              "inst/examples/generate_constraints_example.R",
-              "inst/examples/generate_initial_ranking_example.R",
-              "inst/examples/generate_transitive_closure_example.R",
-              "inst/examples/label_switching_example.R",
-              "inst/examples/obs_freq_example.R",
-              "inst/examples/plot_top_k_example.R",
-              "inst/examples/plot.BayesMallows_example.R",
-              "inst/examples/sample_mallows_example.R",
+              "inst",
               "data-raw",
               "tests/testthat.R",
               "tests",

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: BayesMallows
 Type: Package
 Title: Bayesian Preference Learning with the Mallows Rank Model
-Version: 2.0.1.9003
+Version: 2.0.1.9006
 Authors@R: c(person("Oystein", "Sorensen",
                     email = "oystein.sorensen.1985@gmail.com",
                     role = c("aut", "cre"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,13 +1,20 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method("burnin<-",BayesMallows)
+S3method("burnin<-",BayesMallowsMixtures)
 S3method(assess_convergence,BayesMallows)
 S3method(assess_convergence,BayesMallowsMixtures)
+S3method(burnin,BayesMallows)
+S3method(burnin,BayesMallowsMixtures)
+S3method(burnin,SMCMallows)
 S3method(compute_consensus,BayesMallows)
 S3method(compute_consensus,SMCMallows)
 S3method(compute_posterior_intervals,BayesMallows)
 S3method(compute_posterior_intervals,SMCMallows)
 S3method(generate_initial_ranking,BayesMallowsIntransitive)
 S3method(generate_initial_ranking,BayesMallowsTransitiveClosure)
+S3method(get_acceptance_ratios,BayesMallows)
+S3method(get_acceptance_ratios,SMCMallows)
 S3method(plot,BayesMallows)
 S3method(plot,SMCMallows)
 S3method(print,BayesMallows)
@@ -16,18 +23,22 @@ S3method(print,SMCMallows)
 S3method(update_mallows,BayesMallows)
 S3method(update_mallows,BayesMallowsPriorSamples)
 S3method(update_mallows,SMCMallows)
+export("burnin<-")
 export(assess_convergence)
 export(assign_cluster)
+export(burnin)
 export(compute_consensus)
 export(compute_expected_distance)
 export(compute_mallows)
 export(compute_mallows_mixtures)
+export(compute_mallows_sequentially)
 export(compute_observation_frequency)
 export(compute_posterior_intervals)
 export(compute_rank_distance)
 export(create_ordering)
 export(create_ranking)
 export(estimate_partition_function)
+export(get_acceptance_ratios)
 export(get_cardinalities)
 export(get_mallows_loglik)
 export(get_transitive_closure)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,18 @@
 # BayesMallows (development versions)
 
+* Acceptance ratios are now tracked both in the Metropolis-Hastings algorithm
+  used by compute_mallows() and in the move step inside the sequential Monte 
+  Carlo algorithm used by update_mallows() and compute_mallows_sequentially(). 
+  Use the function get_acceptance_ratios() to access them.
+* BREAKING CHANGE: Burnin now has to be explicitly set using 
+  'burnin(model) <- value' if it is not already set in compute_options. This 
+  alleviates the need for a 'burnin' argument in the functions for assessing the
+  posterior distribution and it abstracts away the implementation from the user.
+  See '?burnin' and '?burnin<-' for details.
+* The swap proposal defined in Crispino et al., Annals of Applied Statistics 
+  (2019) is now an option for proposing the modal ranking rho. It can be 
+  defined by setting rho_proposal="swap" in set_compute_options(). The leap-and-
+  shift distribution is still the default.
 * Fixed a bug in heat_plot() when the model has been estimated with 
   rho_thinning > 1, causing the probabilities to be unnormalized. Issue #381. 
   Thanks to Marta Crispino for discovering the bug.
@@ -10,10 +23,12 @@
   sampling of latent ranks, specified in the "latent_sampling_lag" argument
   to set_smc_options().
 * Prior for precision parameter alpha is now a gamma distribution. Until now
-  an exponential distribution has been assumed. Since the exponential is a special
-  case of the gamma with shape parameter equal to 1 (the default), this is not
-  a breaking change. However, it adds flexibility when it comes to specifying the prior.
-* setup_rank_data() now accepts a single vector of rankings, silently converting a to matrix with a single row.
+  an exponential distribution has been assumed. Since the exponential is a 
+  special case of the gamma with shape parameter equal to 1 (the default), this 
+  is not a breaking change. However, it adds flexibility when it comes to 
+  specifying the prior.
+* setup_rank_data() now accepts a single vector of rankings, silently converting 
+  a vector to matrix with a single row.
 * Sequential Monte Carlo algorithm can now start from a sample from the prior
   distribution, see the sample_prior() function for an example.
 * Added support for parallelism under-the-hood with oneTBB.

diff --git a/R/acceptance_ratio.R b/R/acceptance_ratio.R
@@ -0,0 +1,33 @@
+#' @title Get Acceptance Ratios
+#' @description Extract acceptance ratio from Metropolis-Hastings
+#'   algorithm used by [compute_mallows()] or by the move step in
+#'   [update_mallows()] and [compute_mallows_sequentially()]. Currently the
+#'   function only returns the values, but it will be refined in the future. If
+#'   burnin is not set in the call to [compute_mallows()], the acceptance ratio
+#'   for all iterations will be reported. Otherwise the post burnin acceptance
+#'   ratio is reported. For the SMC method the acceptance ratios apply to all
+#'   iterations, since no burnin is needed in here.
+#'
+#' @param model_fit A model fit.
+#' @param ... Other arguments passed on to other methods. Currently not used.
+#'
+#' @export
+#' @example /inst/examples/get_acceptance_ratios_example.R
+#'
+#' @family posterior quantities
+#'
+get_acceptance_ratios <- function(model_fit, ...) {
+  UseMethod("get_acceptance_ratios")
+}
+
+#' @export
+#' @rdname get_acceptance_ratios
+get_acceptance_ratios.BayesMallows <- function(model_fit, ...) {
+  model_fit$acceptance_ratios
+}
+
+#' @export
+#' @rdname get_acceptance_ratios
+get_acceptance_ratios.SMCMallows <- function(model_fit, ...) {
+  model_fit$acceptance_ratios
+}
diff --git a/R/assess_convergence.R b/R/assess_convergence.R
@@ -13,7 +13,7 @@
 #'   `"theta"`.
 #'
 #' @param items The items to study in the diagnostic plot for `rho`. Either a
-#'   vector of item names, corresponding to `model_fit$items` or a vector of
+#'   vector of item names, corresponding to `model_fit$data$items` or a vector of
 #'   indices. If NULL, five items are selected randomly. Only used when
 #'   `parameter = "rho"` or `parameter = "Rtilde"`.
 #'
@@ -109,23 +109,23 @@ trace_alpha <- function(m, clusters) {
 }
 
 trace_rho <- function(model_fit, items, clusters = model_fit$n_clusters > 1) {
-  if (is.null(items) && model_fit$n_items > 5) {
+  if (is.null(items) && model_fit$data$n_items > 5) {
     message("Items not provided by user. Picking 5 at random.")
-    items <- sample.int(model_fit$n_items, 5)
-  } else if (is.null(items) && model_fit$n_items > 0) {
-    items <- seq.int(from = 1, to = model_fit$n_items)
+    items <- sample.int(model_fit$data$n_items, 5)
+  } else if (is.null(items) && model_fit$data$n_items > 0) {
+    items <- seq.int(from = 1, to = model_fit$data$n_items)
   } else if (!is.null(items)) {
     if (is.numeric(items) &&
-      length(setdiff(items, seq_len(model_fit$n_item))) > 0) {
+      length(setdiff(items, seq_len(model_fit$data$n_items))) > 0) {
       stop("numeric items vector must contain indices between 1 and the number of items")
     }
-    if (is.character(items) && length(setdiff(items, model_fit$items) > 0)) {
+    if (is.character(items) && length(setdiff(items, model_fit$data$items) > 0)) {
       stop("unknown items provided")
     }
   }
 
   if (!is.character(items)) {
-    items <- model_fit$items[items]
+    items <- model_fit$data$items[items]
   }
 
   df <- model_fit$rho[model_fit$rho$item %in% items, , drop = FALSE]
@@ -157,20 +157,20 @@ trace_rtilde <- function(model_fit, items, assessors, ...) {
     stop("Please rerun with compute_mallows with save_aug = TRUE")
   }
 
-  if (is.null(items) && model_fit$n_items > 5) {
+  if (is.null(items) && model_fit$data$n_items > 5) {
     message("Items not provided by user. Picking 5 at random.")
-    items <- sample.int(model_fit$n_items, 5)
-  } else if (is.null(items) && model_fit$n_items > 0) {
-    items <- seq.int(from = 1, to = model_fit$n_items)
+    items <- sample.int(model_fit$data$n_items, 5)
+  } else if (is.null(items) && model_fit$data$n_items > 0) {
+    items <- seq.int(from = 1, to = model_fit$data$n_items)
   }
 
-  if (is.null(assessors) && model_fit$n_assessors > 5) {
+  if (is.null(assessors) && model_fit$data$n_assessors > 5) {
     message("Assessors not provided by user. Picking 5 at random.")
-    assessors <- sample.int(model_fit$n_assessors, 5)
-  } else if (is.null(assessors) && model_fit$n_assessors > 0) {
-    assessors <- seq.int(from = 1, to = model_fit$n_assessors)
+    assessors <- sample.int(model_fit$data$n_assessors, 5)
+  } else if (is.null(assessors) && model_fit$data$n_assessors > 0) {
+    assessors <- seq.int(from = 1, to = model_fit$data$n_assessors)
   } else if (!is.null(assessors)) {
-    if (length(setdiff(assessors, seq(1, model_fit$n_assessors, 1))) > 0) {
+    if (length(setdiff(assessors, seq(1, model_fit$data$n_assessors, 1))) > 0) {
       stop("assessors vector must contain numeric indices between 1 and the number of assessors")
     }
   }

diff --git a/R/assign_cluster.R b/R/assign_cluster.R
@@ -6,10 +6,6 @@
 #' @param model_fit An object of type `BayesMallows`, returned from
 #'   [compute_mallows()].
 #'
-#' @param burnin A numeric value specifying the number of iterations to discard
-#'   as burn-in. Defaults to `model_fit$burnin`, and must be provided if
-#'   `model_fit$burnin` does not exist. See [assess_convergence()].
-#'
 #' @param soft A logical specifying whether to perform soft or hard clustering.
 #'   If `soft=TRUE`, all cluster probabilities are returned, whereas if
 #'   `soft=FALSE`, only the maximum a posterior (MAP) cluster probability is
@@ -42,14 +38,13 @@
 #' head(assign_cluster(mixture_model, soft = FALSE))
 #'
 assign_cluster <- function(
-    model_fit, burnin = model_fit$burnin, soft = TRUE, expand = FALSE) {
-  if (is.null(burnin)) {
-    stop("Please specify the burnin.")
+    model_fit, soft = TRUE, expand = FALSE) {
+  if (is.null(burnin(model_fit))) {
+    stop("Please specify the burnin with 'burnin(model_fit) <- value'.")
   }
-  stopifnot(burnin < model_fit$nmc)
 
   df <- model_fit$cluster_assignment[
-    model_fit$cluster_assignment$iteration > burnin, ,
+    model_fit$cluster_assignment$iteration > burnin(model_fit), ,
     drop = FALSE
   ]
 

diff --git a/R/burnin.R b/R/burnin.R
@@ -0,0 +1,82 @@
+#' @title Set the burnin
+#' @description Set or update the burnin of a model
+#'   computed using Metropolis-Hastings.
+#'
+#' @param model An object of class `BayesMallows` returned from
+#'   [compute_mallows()] or an object of class `BayesMallowsMixtures` returned
+#'   from [compute_mallows_mixtures()].
+#' @param ... Optional arguments passed on to other methods. Currently not used.
+#' @param value An integer specifying the burnin. If `model` is of class
+#'   `BayesMallowsMixtures`, a single value will be assumed to be the burnin
+#'   for each model element. Alternatively, `value` can be specified as an
+#'   integer vector of the same length as `model`, and hence a separate burnin
+#'   can be set for each number of mixture components.
+#'
+#' @export
+#' @return An object of class `BayesMallows` with burnin set.
+#'
+#' @family modeling
+#'
+#' @example /inst/examples/burnin_example.R
+#'
+`burnin<-` <- function(model, ..., value) UseMethod("burnin<-")
+
+#' @export
+#' @rdname burnin-set
+`burnin<-.BayesMallows` <- function(model, ..., value) {
+  if (inherits(model, "SMCMallows")) {
+    stop("Cannot set burnin for SMC model.")
+  }
+  validate_integer(value)
+  if (value >= model$compute_options$nmc) {
+    stop("Burnin cannot be larger than the number of Monte Carlo samples.")
+  }
+  # Workaround as long as we have the deprecation notice for `$<-`
+  class(model) <- "list"
+  model$compute_options$burnin <- value
+  class(model) <- "BayesMallows"
+  model
+}
+
+#' @export
+#' @rdname burnin-set
+`burnin<-.BayesMallowsMixtures` <- function(model, ..., value) {
+  for (v in value) validate_integer(v)
+  if (length(value) == 1) value <- rep(value, length(model))
+  if (length(value) != length(model)) stop("Wrong number of entries in value.")
+
+  for (i in seq_along(model)) burnin(model[[i]]) <- value[[i]]
+  model
+}
+
+#' @title See the burnin
+#' @description
+#' See the current burnin value of the model.
+#'
+#' @param model A model object.
+#' @param ... Optional arguments passed on to other methods. Currently not used.
+#'
+#' @export
+#' @return An integer specifying the burnin, if it exists. Otherwise `NULL`.
+#'
+#' @family modeling
+#'
+#' @example /inst/examples/burnin_example.R
+#'
+burnin <- function(model, ...) UseMethod("burnin")
+
+#' @rdname burnin
+#' @export
+burnin.BayesMallows <- function(model, ...) {
+  model$compute_options$burnin
+}
+
+#' @rdname burnin
+#' @export
+burnin.BayesMallowsMixtures <- function(model, ...) {
+  lapply(model, burnin)
+}
+
+#' @rdname burnin
+#' @export
+burnin.SMCMallows <- function(model, ...) 0
diff --git a/R/compute_consensus.R b/R/compute_consensus.R
@@ -8,9 +8,6 @@
 #' @param model_fit A model fit.
 #' @param type Character string specifying which consensus to compute. Either
 #'   `"CP"` or `"MAP"`. Defaults to `"CP"`.
-#' @param burnin A numeric value specifying the number of iterations to discard
-#'   as burn-in. Defaults to `model_fit$burnin`, and must be provided if
-#'   `model_fit$burnin` does not exist. See [assess_convergence()].
 #' @param parameter Character string defining the parameter for which to compute
 #'   the consensus. Defaults to `"rho"`. Available options are `"rho"` and
 #'   `"Rtilde"`, with the latter giving consensus rankings for augmented ranks.
@@ -32,10 +29,11 @@ compute_consensus <- function(model_fit, ...) {
 #' @export
 #' @rdname compute_consensus
 compute_consensus.BayesMallows <- function(
-    model_fit, type = c("CP", "MAP"), burnin = model_fit$burnin,
+    model_fit, type = c("CP", "MAP"),
     parameter = c("rho", "Rtilde"), assessors = 1L, ...) {
-  if (is.null(burnin)) stop("Please specify the burnin.")
-  stopifnot(burnin < model_fit$nmc)
+  if (is.null(burnin(model_fit))) {
+    stop("Please specify the burnin with 'burnin(model_fit) <- value'.")
+  }
   type <- match.arg(type, c("CP", "MAP"))
   parameter <- match.arg(parameter, c("rho", "Rtilde"))
 
@@ -45,15 +43,15 @@ compute_consensus.BayesMallows <- function(
   }
 
   if (parameter == "rho") {
-    df <- model_fit$rho[model_fit$rho$iteration > burnin, , drop = FALSE]
+    df <- model_fit$rho[model_fit$rho$iteration > burnin(model_fit), , drop = FALSE]
     if (type == "CP") {
       df <- cpc_bm(df)
     } else if (type == "MAP") {
       df <- cpm_bm(df)
     }
   } else if (parameter == "Rtilde") {
     df <- model_fit$augmented_data[
-      model_fit$augmented_data$iteration > burnin &
+      model_fit$augmented_data$iteration > burnin(model_fit) &
         model_fit$augmented_data$assessor %in% assessors, ,
       drop = FALSE
     ]
@@ -83,8 +81,8 @@ compute_consensus.BayesMallows <- function(
 compute_consensus.SMCMallows <- function(
     model_fit, type = c("CP", "MAP"), parameter = "rho", ...) {
   parameter <- match.arg(parameter, "rho")
-  model_fit$burnin <- 0
-  model_fit$nmc <- model_fit$n_particles
+  model_fit$compute_options$burnin <- 0
+  model_fit$compute_options$nmc <- model_fit$n_particles
   NextMethod("compute_consensus")
 }
 

diff --git a/R/compute_mallows.R b/R/compute_mallows.R
@@ -82,7 +82,9 @@ compute_mallows <- function(
   validate_rankings(data)
   validate_initial_values(initial_values, data)
 
-  pfun_values <- extract_pfun_values(model_options, data, pfun_estimate)
+  pfun_values <- extract_pfun_values(
+    model_options$metric, data$n_items, pfun_estimate
+  )
 
   if (is.null(cl)) {
     lapplyfun <- lapply