Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Merge branch 'f-#2-to-utf8'. Fixes #2. Fixes #5.
Browse files Browse the repository at this point in the history
- New `to_utf8()` performs deep conversion to UTF-8, including names and other attributes (#2, #5).
  • Loading branch information
krlmlr committed Aug 9, 2016
2 parents b1dd68e + b2b8cd6 commit 408e401
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 1 deletion.
50 changes: 50 additions & 0 deletions R/to-utf8.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' Deep conversion to UTF-8
#'
#' Converts all characters directly or indirectly contained in an object to
#' UTF-8.
to_utf8 <- function(x, ...) UseMethod("to_utf8", x)

to_utf8.utf8 <- function(x, ...) {
attrib_to_utf8(x)
}

to_utf8.list <- function(x, ...) {
x[] <- lapply(x, to_utf8)
names(x) <- to_utf8(names(x))
attrib_to_utf8(x)
}

to_utf8.data.frame <- to_utf8.list

to_utf8.character <- function(x, ..., use_class = TRUE) {
if (use_class)
x <- as.utf8(x)
else
x <- enc2utf8(x)
attrib_to_utf8(x)
}

to_utf8.default <- function(x, ...) {
attrib_to_utf8(x)
}

to_utf8.NULL <- function(x, ...) {
NULL
}

attrib_to_utf8 <- function(x) {
mostattributes(x) <- named_to_utf8_except_class(attributes(x))
x
}

named_to_utf8_except_class <- function(attrib) {
is_class <- which(names(attrib) == "class")
if (length(is_class) > 0) {
attrib[-is_class] <- to_utf8(unname(attrib)[-is_class])
attrib[[is_class]] <- to_utf8(unname(attrib)[[is_class]], use_class = FALSE)
} else {
attrib <- to_utf8(unname(attrib))
}

attrib
}
13 changes: 13 additions & 0 deletions man/to_utf8.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion src/encoding.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <sys/time.h>
#include <sys/resource.h>

#define USE_RINTERNALS
#include <R.h>
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-to-utf8.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
context("to-utf8")

test_that("character vectors", {
expect_is(to_utf8(letters), "utf8")
expect_false(inherits(class(to_utf8(letters)), "utf8"))
})

test_that("iris", {
iris_utf8 <- to_utf8(iris)
expect_is(colnames(iris_utf8), "utf8")
expect_true(all_utf8(names(attributes(iris_utf8))))
expect_is(levels(iris_utf8$Species), "utf8")
})

test_that("mtcars", {
mtcars_utf8 <- to_utf8(mtcars)
expect_is(colnames(mtcars_utf8), "utf8")
expect_true(all_utf8(names(attributes(mtcars_utf8))))
expect_true(all_utf8(rownames(mtcars_utf8)))
})

test_that("umlauts", {
data <- data.frame(a = I(c("o", "u")))
colnames(data) <- enc2native("\u00e4")
data[[1]] <- enc2native(c("\u00f6", "\u00fc"))

data_utf8 <- to_utf8(data)
expect_is(colnames(data_utf8), "utf8")
expect_true(all_utf8(names(attributes(data_utf8))))
expect_true(all_utf8(rownames(data_utf8)))
})

0 comments on commit 408e401

Please sign in to comment.