Skip to content

Commit

Permalink
implement impute_zero, impute_factor, and the flexible impute_fixed - r…
Browse files Browse the repository at this point in the history
…esolves #261
  • Loading branch information
njtierney committed Apr 7, 2023
1 parent 0da8ccd commit dc27246
Show file tree
Hide file tree
Showing 12 changed files with 362 additions and 0 deletions.
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ Collate:
'gg-miss-var.R'
'gg-miss-which.R'
'helpers.R'
'impute-factor.R'
'impute-fixed.R'
'impute-median.R'
'impute-zero.R'
'impute_below.R'
'impute_mean.R'
'label-miss.R'
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Generated by roxygen2: do not edit by hand

S3method(impute_factor,character)
S3method(impute_factor,default)
S3method(impute_factor,factor)
S3method(impute_factor,shade)
S3method(impute_fixed,default)
S3method(impute_mean,default)
S3method(impute_mean,factor)
S3method(impute_median,default)
Expand Down Expand Up @@ -76,6 +81,8 @@ export(impute_below)
export(impute_below_all)
export(impute_below_at)
export(impute_below_if)
export(impute_factor)
export(impute_fixed)
export(impute_mean)
export(impute_mean_all)
export(impute_mean_at)
Expand All @@ -84,6 +91,7 @@ export(impute_median)
export(impute_median_all)
export(impute_median_at)
export(impute_median_if)
export(impute_zero)
export(is_na)
export(is_shade)
export(label_miss_1d)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# naniar (development version)

## New

- implement `impute_fixed` and `impute_zero`, notably these do not implement "scoped variants" which were previously implemented - for example, `impute_fixed_if` etc. This is in favour of using the new `across` workflow within `dplyr`, and it is easier to maintain.

# naniar 1.0.0

Version 1.0.0 of naniar is to signify that this release is associated with
Expand Down
59 changes: 59 additions & 0 deletions R/impute-factor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Impute a factor value into a vector with missing values
#'
#' For imputing fixed factor levels. It adds the new imputed value to the end
#' of the levels of the vector. We generally recommend to impute using other
#' model based approaches. See [impute_lm()] and friends.
#'
#' @param x vector
#' @param value factor to impute
#'
#' @return vector with a factor values replaced
#' @export
#' @name impute_factor
#'
#' @examples
#'
#' vec <- factor(LETTERS[1:10])
#'
#' vec[sample(1:10, 3)] <- NA
#'
#' impute_factor(vec, "wat")
#'
impute_factor <- function(x, value) UseMethod("impute_factor")

#' @export
#' @rdname impute_factor
impute_factor.default <- function(x, value){
vctrs::vec_assert(x, ptype = character())
}

#' @export
#' @rdname impute_factor
impute_factor.factor <- function(x, value){

x <- forcats::fct_expand(x, value)

x[is.na(x)] <- factor(value)

x
}

#' @export
#' @rdname impute_factor
impute_factor.character <- function(x, value){

x <- forcats::fct_expand(x, value)

x[is.na(x)] <- factor(value)

x
}

#' @export
#' @rdname impute_factor
impute_factor.shade <- function(x, value){

#do nothing
x

}
31 changes: 31 additions & 0 deletions R/impute-fixed.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#' Impute a fixed value into a vector with missing values
#'
#' This can be useful if you are imputing specific values, however we would
#' generally recommend to impute using other model based approaches. See
#' [impute_lm()] and friends.
#'
#' @param x vector
#' @param value value to impute
#'
#' @return vector with a fixed values replaced
#' @export
#' @name impute_fixed
#'
#' @examples
#'
#' vec <- rnorm(10)
#'
#' vec[sample(1:10, 3)] <- NA
#'
#' impute_fixed(vec, -999)
#'
impute_fixed <- function(x, value) UseMethod("impute_fixed")

#' @export
#' @rdname impute_fixed
impute_fixed.default <- function(x, value){

x[is.na(x)] <- value

x
}
24 changes: 24 additions & 0 deletions R/impute-zero.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#' Impute zero into a vector with missing values
#'
#' This can be useful if you are imputing specific values, however we would
#' generally recommend to impute using other model based approaches. See
#' [impute_lm()] and friends.
#'
#' @param x vector
#'
#' @return vector with a fixed values replaced
#' @export
#'
#' @examples
#'
#' vec <- rnorm(10)
#'
#' vec[sample(1:10, 3)] <- NA
#'
#' impute_zero(vec)
#' @rdname impute_zero
impute_zero <- function(x){

impute_fixed(x = x, value = 0)

}
42 changes: 42 additions & 0 deletions man/impute_factor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions man/impute_fixed.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/impute_zero.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions tests/testthat/test-impute-factor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
library(dplyr)
vec <- factor(x = c(NA, LETTERS[1:4]))

vec2 <- factor(x = c("wat", LETTERS[1:4]))

chick <- chickwts %>%
mutate(
feed = set_prop_miss(feed, prop = 0.2)
)
chick_shadow <- nabular(chick)

test_that("impute_factor works", {
expect_equal(impute_factor(vec, "wat"), vec2)
})

## impute_factor_across --------------------------------------------------------
test_that("impute_factor works with across", {
expect_false(
mutate(chick,
across(where(is.factor), \(x) impute_factor(x, "wat"))) %>%
all_na()
)
})

test_that("impute_factor works with across and nabular", {
expect_false(
mutate(chick_shadow,
across(where(is.factor), \(x) impute_factor(x, "wat"))) %>%
all_na()
)
})

test_that("impute_factor retains proper shadow values when used with across", {
expect_equal(
unbind_data(
mutate(chick_shadow,
across(where(is.factor), \(x) impute_factor(x, "wat")))
),
unbind_data(
chick_shadow
)
)
})
45 changes: 45 additions & 0 deletions tests/testthat/test-impute-fixed.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

fixed_val <- -99

vec2 <- vec

vec2[is.na(vec)] <- fixed_val

aq_shadow <- nabular(airquality)

test_that("impute_fixed works", {
expect_equal(impute_fixed(vec, -99), vec2)
})

## impute_fixed_across --------------------------------------------------------
library(dplyr)
test_that("impute_fixed works with across", {
expect_false(
mutate(airquality,
across(where(is.numeric), \(x) impute_fixed(x, -99))) %>%
all_na()
)
})

test_that("impute_fixed works with across and nabular", {
expect_false(
mutate(aq_shadow,
across(where(is.numeric), \(x) impute_fixed(x, -99))) %>%
all_na()
)
})

test_that("impute_fixed retains proper shadow values when used with across", {
expect_equal(
unbind_data(
mutate(aq_shadow,
across(where(is.numeric), \(x) impute_fixed(x, -99)))
),
unbind_data(
aq_shadow
)
)
})
Loading

0 comments on commit dc27246

Please sign in to comment.