Added becomes_ta()

marberts · Feb 21, 2024 · 0b3e743 · 0b3e743
1 parent 6cd57c3
commit 0b3e743
Show file tree

Hide file tree

Showing 7 changed files with 97 additions and 15 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: sps
 Title: Sequential Poisson Sampling
-Version: 0.5.3.9007
+Version: 0.5.3.9008
 Authors@R: c(
     person("Steve", "Martin",
             role = c("aut", "cre", "cph"),
@@ -30,4 +30,4 @@ URL: https://marberts.github.io/sps/, https://github.com/marberts/sps
 BugReports: https://github.com/marberts/sps/issues
 VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ S3method(print,sps_sample)
 S3method(print,sps_sample_summary)
 S3method(summary,sps_sample)
 S3method(weights,sps_sample)
+export(becomes_ta)
 export(expected_coverage)
 export(inclusion_prob)
 export(order_sampling)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,8 @@
 ## Changes in version 0.5.4
 
+- Added `becomes_ta()` to determine the sample size when a unit enter the
+take-all stratum.
+
 - Internal changes to the way classes are instantiated. No user-visible changes.
 
 - Updated maintainer email.

diff --git a/R/inclusion_prob.R b/R/inclusion_prob.R
@@ -121,22 +121,36 @@ inclusion_prob_ <- function(x, n, strata, alpha, cutoff) {
 #' and assigning these units an inclusion probability of 1, with the remaining
 #' inclusion probabilities recalculated at each step. If \eqn{\alpha > 0}, then
 #' any ties among units with the same size are broken by their position.
+#' 
+#' The `becomes_ta()` function reverses this operations and finds the critical
+#' sample size at which a unit enters the take-all stratum. This value is
+#' undefined for units that are always included in the sample (because their 
+#' size exceeds `cutoff`) or never included.
 #'
 #' @inheritParams sps
 #'
 #' @returns
-#' A numeric vector of inclusion probabilities for each unit in the population.
+#' `inclusion_prob()` returns a numeric vector of inclusion probabilities for
+#' each unit in the population.
+#' 
+#' `becomes_ta()` returns an integer vector giving the sample size at which a
+#' unit enters the take-all stratum.
 #'
 #' @seealso
 #' [sps()] for drawing a sequential Poisson sample.
 #'
 #' @examples
-#' # Make a population with units of different size
+#' # Make inclusion probabilities for a population with units
+#' # of different size
 #' x <- c(1:10, 100)
+#' (pi <- inclusion_prob(x, 5))
+#' 
+#' # The last unit is sufficiently large to be included in all
+#' # samples with two or more units
+#' becomes_ta(x)
 #'
 #' # Use the inclusion probabilities to calculate the variance of the
 #' # sample size for Poisson sampling
-#' pi <- inclusion_prob(x, 5)
 #' sum(pi * (1 - pi))
 #'
 #' @export
@@ -152,3 +166,26 @@ inclusion_prob <- function(x,
   cutoff <- as.numeric(cutoff)
   unsplit(inclusion_prob_(x, n, strata, alpha, cutoff), strata)
 }
+
+#' Sample size when a unit becomes take-all
+#' @rdname inclusion_prob
+#' @export
+becomes_ta <- function(x, alpha = 1e-3, cutoff = Inf) {
+  if (any(x < 0)) {
+    stop("sizes must be greater than or equal to 0")
+  }
+  if (alpha < 0 || alpha > 1) {
+    stop("'alpha' must be between 0 and 1")
+  }
+  if (cutoff <= 0) {
+    stop("'cutoff' must be greater than 0")
+  }
+
+  ta <- which(x >= cutoff)
+  x[ta] <- 0
+  ord <- rev(order(x, decreasing = TRUE))
+  x <- x[ord]
+  res <- pmax.int(ceiling(cumsum(x) / x * (1 - alpha)), 1) +
+    length(x) - seq_along(x) + length(ta)
+  res[order(ord)]
+}
diff --git a/man/inclusion_prob.Rd b/man/inclusion_prob.Rd
diff --git a/tests/testthat/test-inclusion_prob.R b/tests/testthat/test-inclusion_prob.R
@@ -29,6 +29,11 @@ test_that("corner cases work as expected", {
     inclusion_prob(c(0, 1, 1, 1 - 1e-4), 3),
     c(0, 1, 1, 1)
   )
+
+  expect_equal(becomes_ta(0), NaN)
+  expect_equal(becomes_ta(1), 1)
+  expect_equal(becomes_ta(1:3, cutoff = 1), c(NaN, NaN, NaN))
+  expect_equal(becomes_ta(c(1, 3, 2, 3), alpha = 1), c(4, 1, 3, 2))
 })
 
 test_that("argument checking works", {
@@ -54,6 +59,10 @@ test_that("argument checking works", {
   expect_error(inclusion_prob(1:6, 2, cutoff = 1:3))
   expect_error(inclusion_prob(1:6, 2, cutoff = 0))
   expect_error(inclusion_prob(1:6, 2, cutoff = NA))
+
+  expect_error(becomes_ta(c(1, 2, -1)))
+  expect_error(becomes_ta(1:5, 1.1))
+  expect_error(becomes_ta(1:5, 0.5, -1))
 })
 
 test_that("inclusion probs are correct with different rounds of TA removal", {
@@ -184,3 +193,25 @@ test_that("cutoff agrees with alpha", {
   expect_equal(inclusion_prob(x, 3, alpha = 0.625, cutoff = 3),
                inclusion_prob(x, 3, cutoff = 3))
 })
+
+test_that("units become TA when expected", {
+  x <- c(6, 4, 3, 4, 2, 1, 4, 2, 2, 1, 2)
+
+  for (a in seq(0, 1, 0.05)) {
+    ta <- becomes_ta(x, a, cutoff = 6)
+    res <- apply(
+      sapply(seq_along(x), \(n) inclusion_prob(x, n, alpha = a, cutoff = 6) == 1),
+      1, which.max
+    )
+    expect_equal(ta[-1], res[-1])
+  }
+})
+
+test_that("adding a cutoff just offsets when a unit becomes TA", {
+  x <- c(5, 3, 2, 4, 3)
+  expect_equal(becomes_ta(x, 0.25), c(3, 4, 5, 4, 5))
+  expect_equal(becomes_ta(x, 0.25, 4), c(NaN, 4, 5, NaN, 5))
+
+  x <- c(10, 10, 10, x)
+  expect_equal(becomes_ta(x, 0.25, 6), c(NaN, NaN, NaN, 6, 7, 8, 7, 8))
+})
diff --git a/vignettes/sps.Rmd b/vignettes/sps.Rmd
@@ -153,13 +153,6 @@ all(sample %in% sample_tu)
 As with any proportional-to-size sampling scheme, there is a critical sample size after which some units become take-all units. If these units are not already in the sample then they can "bump" previously sampled units, requiring a larger sample size to keep all the previously sampled units in the new sample. This can be seen by finding the sample size at which each unit enters the take-all stratum.
 
 ```{r critical}
-becomes_ta <- function(x, alpha = 1e-3) {
-  ord <- rev(order(x, decreasing = TRUE))
-  x <- x[ord]
-  res <- ceiling(cumsum(x) / x * (1 - alpha)) + length(x) - seq_along(x)
-  res[order(ord)]
-}
-
 Map(\(x) head(becomes_ta(x)), split(frame$revenue, frame$region))
 ```