Skip to content

Commit

Permalink
Merge pull request #2 from marberts/generic-matrix
Browse files Browse the repository at this point in the history
Added match_first, closing #1
  • Loading branch information
marberts authored Nov 22, 2024
2 parents d95d3c5 + 3421558 commit 9f3378b
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 32 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rsmatrix
Title: Matrices for Repeat-Sales Price Indexes
Version: 0.2.8.9001
Version: 0.2.8.9002
Authors@R: c(
person(given = "Steve", family = "Martin", role = c("aut", "cre", "cph"),
email = "marberts@protonmail.com",
Expand All @@ -23,5 +23,5 @@ URL: https://marberts.github.io/rsmatrix/, https://github.com/marberts/rsmatrix
BugReports: https://github.com/marberts/rsmatrix/issues
Config/testthat/edition: 3
VignetteBuilder: knitr
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
14 changes: 10 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

- Updated maintainer email.

- `rs_pairs()` gets a new argument `match_first` to control if products in the
first period match to themselves (#1).

## Changes in version 0.2.8

- Added a vignette.
Expand All @@ -14,17 +17,20 @@

## Changes in version 0.2.3

- Making the "Y" vector with rs_matrix() no longer gives an error with length-0 inputs and a factor with non-empty levels.
- Making the `"Y"` vector with `rs_matrix()` no longer gives an error with
length-0 inputs and a factor with non-empty levels.

- rs_matrix() cleans up the enclosing environment of its result.
- `rs_matrix()` cleans up the enclosing environment of its result.

## Changes in version 0.2.1

- rs_pairs() and rs_matrix() are now faster, and less picky about their inputs for time periods.
- `rs_pairs()` and `rs_matrix()` are now faster, and less picky about their
inputs for time periods.

## Changes in version 0.2.0

- rs_pairs() has been reworked to be much faster and more general, while rs_unpair() has been removed. These changes are not backwards compatible.
- `rs_pairs()` has been reworked to be much faster and more general,
while `rs_unpair()` has been removed. These changes are not backwards compatible.

- Added French translations.

Expand Down
33 changes: 20 additions & 13 deletions R/rs_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ different_lengths <- function(...) {
#' Compute the Z matrix (internal)
#' @noRd
rs_z_ <- function(t2, t1, f = NULL, sparse = FALSE) {
# coerce t2 and t1 into characters prior to taking the union
# so that both dates and factors are treated the same
lev2 <- as.character(unique(t2))
lev1 <- as.character(unique(t1))
# t2 and t1 are coerced into characters prior to taking the union
# so that both dates and factors are treated the same.
lev2 <- unique(as.character(t2))
lev1 <- unique(as.character(t1))
lev <- sort.int(unique(c(lev2, lev1))) # usually faster than base::union()
t2 <- factor(t2, lev)
t1 <- factor(t1, lev)
Expand All @@ -22,7 +22,7 @@ rs_z_ <- function(t2, t1, f = NULL, sparse = FALSE) {
)
}

# make row names before interacting with f
# Make row names before interacting with f.
nm <- if (!is.null(names(t2))) {
names(t2)
} else if (!is.null(names(t1))) {
Expand All @@ -40,7 +40,7 @@ rs_z_ <- function(t2, t1, f = NULL, sparse = FALSE) {
lev <- levels(t2)
}

# calculate Z
# Calculate Z.
dims <- c(length(nm), length(lev))
attributes(t2) <- NULL
attributes(t1) <- NULL
Expand Down Expand Up @@ -70,7 +70,9 @@ rs_z_ <- function(t2, t1, f = NULL, sparse = FALSE) {

#' Compute X matrix (internal)
#' @noRd
rs_x_ <- function(z, p2, p1) (z > 0) * p2 - (z < 0) * p1
rs_x_ <- function(z, p2, p1) {
(z > 0) * p2 - (z < 0) * p1
}

#' Shiller's repeat-sales matrices
#'
Expand Down Expand Up @@ -165,6 +167,11 @@ rs_x_ <- function(z, p2, p1) (z > 0) * p2 - (z < 0) * p1
#'
#' @export
rs_matrix <- function(t2, t1, p2, p1, f = NULL, sparse = FALSE) {
t2 <- as.character(t2)
t1 <- as.character(t1)
p2 <- as.numeric(p2)
p1 <- as.numeric(p1)

if (is.null(f)) {
if (different_lengths(t2, t1, p2, p1)) {
stop("'t2', 't1', 'p2', and 'p1' must be the same length")
Expand All @@ -181,23 +188,23 @@ rs_matrix <- function(t2, t1, p2, p1, f = NULL, sparse = FALSE) {
stop("'t2', 't1', and 'f' cannot contain NAs")
}
}

z <- rs_z_(t2, t1, f, sparse)
p2 <- as.numeric(p2)
p1 <- as.numeric(p1)
# number of columns that need to be removed for base period
# Number of columns that need to be removed for base period.
n <- max(1L, nlevels(f)) * (ncol(z) > 0)
# return value

res <- function(matrix = c("Z", "X", "y", "Y")) {
switch(match.arg(matrix),
Z = z[, -seq_len(n), drop = FALSE],
X = rs_x_(z[, -seq_len(n), drop = FALSE], p2, p1),
y = structure(log(p2 / p1), names = rownames(z)),
# rowSums() gets the single value in the base period
# for each group
# for each group.
Y = -Matrix::rowSums(rs_x_(z[, seq_len(n), drop = FALSE], p2, p1))
)
}
# clean up enclosing environment

# Clean up enclosing environment.
enc <- list(z = z, n = n, p2 = p2, p1 = p1)
environment(res) <- list2env(enc, parent = getNamespace("rsmatrix"))
res
Expand Down
20 changes: 13 additions & 7 deletions R/rs_pairs.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
#' [order()]).
#' @param product A vector that gives the product identifier for each sale.
#' Usually a factor or vector of integer codes for each product.
#' @param match_first Should products in the first period match with
#' themselves (the default)?
#'
#' @returns
#' A numeric vector of indices giving the position of the previous sale
#' for each `product`, with the convention that the previous sale for the
#' first sale is itself. Ties are resolved according to the order they
#' appear in `period`.
#' first sale is itself if `match_first = TRUE`, `NA` otherwise. Ties are
#' resolved according to the order they appear in `period`.
#'
#' @note
#' [`order()`] is the workhorse of `rs_pairs()`, so performance can be
Expand Down Expand Up @@ -42,12 +44,12 @@
#' x
#'
#' @export
rs_pairs <- function(period, product) {
rs_pairs <- function(period, product, match_first = TRUE) {
if (length(product) != length(period)) {
stop("'period' and 'product' must be the same length")
}

# != is slow for factors with many levels, so use the integer codes
# != is slow for factors with many levels, so use the integer codes.
if (is.factor(product)) {
attributes(product) <- NULL
}
Expand All @@ -58,11 +60,15 @@ rs_pairs <- function(period, product) {
}

res <- rep.int(NA_integer_, length(period))
# offset the period by product ordering
# Offset the period by product ordering.
res[ord] <- ord[c(1L, seq_len(length(ord) - 1L))]
# the first period for each product points to the last period
# for the previous product
# The first period for each product points to the last period
# for the previous product.
first <- which(product[res] != product)
res[first] <- first

if (!match_first) {
res[period[res] == period] <- NA
}
res
}
6 changes: 3 additions & 3 deletions R/rs_var.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ rs_var <- function(u, Z, X = Z, ids = seq_len(nrow(X)), df = NULL) {
} else {
as.numeric(df)
}
# the meat
# The meat.
ug <- split.data.frame(u, ids)
Zg <- split.data.frame(Z, ids)
V <- Map(function(x, y) tcrossprod(crossprod(x, y)), Zg, ug)
V <- Reduce(`+`, V)
# the bread
# The bread.
B <- solve(crossprod(Z, X))
# put the sandwich together
# Put the sandwich together.
vcov <- tcrossprod(B %*% V, B)
df * vcov
}
9 changes: 6 additions & 3 deletions man/rs_pairs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions tests/testthat/test-rs_pairs.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ test_that("an easy example works", {
rs_pairs(x, y),
c(1L, 10L, 3L, 3L, 4L, 7L, 1L, 8L, 2L, 10L, 8L)
)

expect_equal(
rs_pairs(x, y, match_first = FALSE),
c(NA, 10, NA, 3, 4, 7, 1, NA, 2, NA, NA)
)

x[2] <- NA
expect_identical(
Expand Down Expand Up @@ -53,9 +58,14 @@ test_that("a more complex example works", {

test_that("corner cases work", {
expect_identical(rs_pairs(numeric(0), character(0)), integer(0))
expect_identical(
rs_pairs(numeric(0), character(0), match_first = FALSE),
integer(0)
)
expect_identical(rs_pairs(1:4, rep(NA, 4)), integer(0))
expect_identical(rs_pairs(rep(NA, 4), 1:4), integer(0))
expect_identical(rs_pairs(1, 1), 1L)
expect_identical(rs_pairs(1, 1, match_first = FALSE), NA_integer_)
expect_identical(rs_pairs(rep(1, 10), 1:10), 1:10)
expect_identical(rs_pairs(1:10, rep(1, 10)), c(1L, 1:9))
expect_identical(rs_pairs(c(1, 2, 3, 2), rep(1, 4)), c(1L, 1L, 4L, 2L))
Expand All @@ -68,6 +78,11 @@ test_that("sales pairs are back periods", {
rs_pairs(period, product),
c(11, 4, 10, 9, 8, 6, 1, 8, 9, 5, 6, 2)
)

expect_equal(
rs_pairs(period, product, match_first = FALSE),
c(11, 4, 10, 9, 8, NA, 1, NA, NA, 5, 6, 2)
)
})

test_that("different length inputs is an error", {
Expand Down

0 comments on commit 9f3378b

Please sign in to comment.