diff --git a/DESCRIPTION b/DESCRIPTION index ea08d573..6e68ef76 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: skimr Title: Compact and Flexible Summaries of Data -Version: 1.0.4 +Version: 1.0.5 Authors@R: c( person("Amelia", "McNamara", email="amcnamara@smith.edu", role = "aut"), person("Eduardo", "Arino de la Rubia", email="earino@gmail.com", role = "aut"), @@ -17,9 +17,10 @@ Authors@R: c( person("Scott", "Brenstuhl", email="brenstsr@miamioh.edu", role = 'ctb'), person("Patrick", "Schratz", email="patrick.schratz@gmail.com", role = 'ctb'), person("lbusett", "", email="lbusett@gmail.com", role='ctb'), - person("Mikko","Korpela", email="mvkorpel@iki.fi", role='ctb'), - person("Jennifer","Thompson", email="thompson.jennifer@gmail.com", role='ctb'), - person("Harris","McGehee", email="mcgehee.harris@gmail.com", role='ctb') + person("Mikko", "Korpela", email="mvkorpel@iki.fi", role='ctb'), + person("Jennifer", "Thompson", email="thompson.jennifer@gmail.com", role='ctb'), + person("Harris", "McGehee", email="mcgehee.harris@gmail.com", role='ctb'), + person("Patrick", "Kennedy", email="pkqstr@protonmail.com", role='ctb') ) Description: A simple to use summary function that can be used with pipes and displays nicely in the console. The default summary statistics may be @@ -57,7 +58,7 @@ URL: https://github.com/ropenscilabs/skimr BugReports: https://github.com/ropenscilabs/skimr/issues VignetteBuilder: knitr Roxygen: list(markdown = TRUE) -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.1 Collate: 'dplyr.r' 'skimr-package.R' diff --git a/NAMESPACE b/NAMESPACE index 57644eef..306b1233 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,22 +1,26 @@ # Generated by roxygen2: do not edit by hand +S3method(arrange,skim_df) +S3method(filter,skim_df) S3method(kable,data.frame) S3method(kable,skim_df) S3method(kable,summary_skim_df) +S3method(mutate,skim_df) S3method(print,skim_df) S3method(print,skim_vector) S3method(print,spark) S3method(print,summary_skim_df) +S3method(select,skim_df) S3method(skim,data.frame) S3method(skim,default) S3method(skim,grouped_df) +S3method(slice,skim_df) S3method(summary,skim_df) export("%>%") -export(arrange.skim_df) export(contains) export(ends_with) export(everything) -export(filter.skim_df) +export(filter) export(fix_windows_histograms) export(get_skimmers) export(inline_hist) @@ -30,7 +34,6 @@ export(list_min_length) export(matches) export(max_char) export(min_char) -export(mutate.skim_df) export(n_complete) export(n_empty) export(n_missing) @@ -40,7 +43,6 @@ export(one_of) export(pander) export(pander.skim_df) export(pander.summary_skim_df) -export(select.skim_df) export(show_formats) export(show_skimmers) export(skim) @@ -51,11 +53,15 @@ export(skim_to_list) export(skim_to_wide) export(skim_with) export(skim_with_defaults) -export(slice.skim_df) export(sorted_count) export(starts_with) export(ts_end) export(ts_start) +importFrom(dplyr,arrange) +importFrom(dplyr,filter) +importFrom(dplyr,mutate) +importFrom(dplyr,select) +importFrom(dplyr,slice) importFrom(magrittr,"%>%") importFrom(tidyselect,contains) importFrom(tidyselect,ends_with) diff --git a/NEWS.md b/NEWS.md index d65802be..8b0f6e85 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,21 @@ -skimr 1.0.4 (2018-01-12) + +skimr 1.0.5 (2019-01-05) ======================== This is likely to be the last release of skimr version 1. Version 2 has major changes to the API. Users should review and prepare for those changes now. +### BUG FIXES + * Fix issue where multibyte characters were causing an error. + * Fix problem in which purrr cannot find mean.default. + +skimr 1.0.4 (2019-01-12) +======================== + +This is likely to be the last release of skimr version 1. Version 2 +has major changes to the API. Users should review and prepare for +those changes now. + ### BUG FIXES * Fix failures in handling dplyr verbs related to upcoming release of dplyr 0.8.0. diff --git a/R/dplyr.r b/R/dplyr.r index 892b227a..132ddafb 100644 --- a/R/dplyr.r +++ b/R/dplyr.r @@ -1,7 +1,10 @@ #' Use dplyr verb filter on skim_df objects. #' #' @seealso [`dplyr::filter()`] -#' @inheritParams dplyr::filter +#' @param .data A skim object +#' @param ... Logical predicates defined in terms of the variables in .data. +#' @param .preserve When FALSE (the default) the grouping structuree is recalculated +#' based on the resulting data. #' @return skim_df object coerced to a data frame. #' @export filter.skim_df <-function (.data, ..., .preserve = FALSE) { @@ -14,7 +17,8 @@ filter.skim_df <-function (.data, ..., .preserve = FALSE) { #' Use dplyr verb select on skim_df objects. #' #' @seealso [`dplyr::select()`] -#' @inheritParams dplyr::select +#' @param .data A skim object +#' @param ... One or more unquoted expressions separated by commas. #' @return skim_df object coerced to a data frame. #' @export select.skim_df <-function (.data, ...) { @@ -27,7 +31,10 @@ select.skim_df <-function (.data, ...) { #' Use dplyr verb mutate on skim_df objects. #' #' @seealso [`dplyr::mutate()`] -#' @inheritParams dplyr::mutate +#' @param .data A skim object +#' @param ... Name-value pairs of expressions. The name of each argument will +#' be the name of a new variable, and the value will be its +#' corresponding value. #' @return skim_df object coerced to a data frame. #' @export mutate.skim_df <-function (.data, ...) { @@ -40,10 +47,14 @@ mutate.skim_df <-function (.data, ...) { #' Use dplyr verb arrange on skim_df objects. #' #' @seealso [`dplyr::arrange()`] -#' @inheritParams dplyr::arrange +#' @param .data A skim object +#' @param ... Comma separated list of unquoted variable names, or expressions +#' involving variable names. +#' @param .by_group If TRUE, will sort first by grouping variable. +#' Applies to grouped data frames only. #' @return skim_df object coerced to a data frame. #' @export -arrange.skim_df <-function (.data, ...) { +arrange.skim_df <-function (.data, ..., .by_group = FALSE) { .data <- as.data.frame(.data) .data <- dplyr::arrange(.data, ...) class(.data) <- c("tbl_df", "tbl", "data.frame") @@ -53,7 +64,12 @@ arrange.skim_df <-function (.data, ...) { #' Use dplyr verb slice on skim_df objects. #' #' @seealso [`dplyr::slice()`] -#' @inheritParams dplyr::filter +#' @param .data A skim object +#' @param ... Integer row calues. Provide either positive values to keep or +#' negative values to drop. +#' @param .preserve when FALSE (the default), the grouping structure is +#' recalculated based on the resulting data, otherwise +#' it is kept as is. #' @return skim_df object coerced to a data frame. #' @export slice.skim_df <-function (.data, ..., .preserve = FALSE) { diff --git a/R/functions.R b/R/functions.R index 030170fc..1d573da4 100644 --- a/R/functions.R +++ b/R/functions.R @@ -154,7 +154,7 @@ numeric_funs <- list( missing = n_missing, complete = n_complete, n = length, - mean = purrr::partial(mean, na.rm = TRUE), + mean = purrr::partial(mean.default, na.rm = TRUE), sd = purrr::partial(sd, na.rm = TRUE), p0 = purrr::partial(quantile, probs = 0, na.rm = TRUE, names = FALSE), p25 = purrr::partial(quantile, probs = .25, na.rm = TRUE, names = FALSE), @@ -187,7 +187,7 @@ logical_funs <- list( missing = n_missing, complete = n_complete, n = length, - mean = purrr::partial(mean, na.rm = TRUE), + mean = purrr::partial(mean.default, na.rm = TRUE), count = sorted_count ) @@ -217,7 +217,7 @@ ts_funs <- list( end = ts_end, frequency = stats::frequency, deltat = stats::deltat, - mean = purrr::partial(mean, na.rm = TRUE), + mean = purrr::partial(mean.default, na.rm = TRUE), sd = purrr::partial(sd, na.rm = TRUE), min = purrr::partial(min, na.rm = TRUE), max = purrr::partial(max, na.rm = TRUE), diff --git a/R/skimr-package.R b/R/skimr-package.R index ec74f57f..405bf8ef 100644 --- a/R/skimr-package.R +++ b/R/skimr-package.R @@ -55,3 +55,19 @@ tidyselect::one_of #' @importFrom tidyselect starts_with #' @export tidyselect::starts_with + +#' @importFrom dplyr select +dplyr::select + +#' @importFrom dplyr arrange +dplyr::arrange + +#' @importFrom dplyr filter +#' @export +dplyr::filter + +#' @importFrom dplyr mutate +dplyr::mutate + +#' @importFrom dplyr slice +dplyr::slice diff --git a/R/stats.R b/R/stats.R index 57f1517e..ef3f506e 100644 --- a/R/stats.R +++ b/R/stats.R @@ -135,7 +135,7 @@ n_empty <- function(x) { #' @export min_char <- function(x) { - characters <- nchar(x) + characters <- nchar(x, allowNA = TRUE) min(characters, na.rm = TRUE) } @@ -145,7 +145,7 @@ min_char <- function(x) { #' @export max_char <- function(x) { - characters <- nchar(x) + characters <- nchar(x, allowNA = TRUE) max(characters, na.rm = TRUE) } diff --git a/README.Rmd b/README.Rmd index 78fbc67f..7cdf7b60 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,7 +18,7 @@ options(width = 120) [![codecov](https://codecov.io/gh/ropensci/skimr/branch/master/graph/badge.svg)](https://codecov.io/gh/ropenscilabs/skimr) [![](https://badges.ropensci.org/175_status.svg)](https://github.com/ropensci/onboarding/issues/175) -`skimr` provides a frictionless approach to summary statistics which conforms +skimr provides a frictionless approach to summary statistics which conforms to the [principle of least surprise](https://en.wikipedia.org/wiki/Principle_of_least_astonishment), displaying summary statistics the user can skim quickly to understand their data. It handles different data types and returns a `skim_df` object which can @@ -26,7 +26,7 @@ be included in a pipeline or displayed nicely for the human reader. ## Installation -The current released version of `skimr` can be installed from CRAN. If you wish +The current released version of skimr can be installed from CRAN. If you wish to install the current build of the next release you can do so using the following: @@ -47,7 +47,7 @@ Do not rely on APIs from the develop branch. ## Skim statistics in the console -`skimr`: +skimr: - Provides a larger set of statistics than `summary()`, including missing, complete, n, and sd. @@ -147,7 +147,7 @@ skim(mtcars) %>% dplyr::filter(stat=="hist") ## Customizing skimr -Although skimr provides opinionated defaults, it is highly customizable. +Although skimr provides opinionated defaults, it is highly customizable. Users can specify their own statistics, change the formatting of results, create statistics for new classes and develop skimmers for data structures that are not data frames. @@ -174,9 +174,9 @@ skim_with_defaults() ### Change formatting -Skimr provides a set of default formats that allow decimals in columns to +skimr provides a set of default formats that allow decimals in columns to be aligned, a reasonable number of decimal places for numeric data, and a -representation of dates. Users can view thes with `show_formats()` and +representation of dates. Users can view these with `show_formats()` and modify them with `skim_format()`. ### Skimming other objects @@ -198,11 +198,11 @@ the low-level code](http://r.789695.n4.nabble.com/Unicode-display-problem-with-d While some cases have been addressed, there are, for example, reports of this issue in Emacs ESS. -This means that while `skimr` can render the histograms to the console and in +This means that while skimr can render the histograms to the console and in `kable()`, it cannot in other circumstances. This includes: -* rendering a `skimr` data frame within `pander()` -* converting a `skimr` data frame to a vanilla R data frame, but tibbles render +* rendering a skimr data frame within `pander()` +* converting a skimr data frame to a vanilla R data frame, but tibbles render correctly One workaround for showing these characters in Windows is to set the CTYPE part @@ -229,7 +229,7 @@ Microsoft Word and Libre Office Write. We welcome issue reports and pull requests, including potentially adding support for commonly used variable classes. However, in general, we encourage -users to take advantage of skimr's flexibility to add their own customized +users to take advantage of skimr's flexibility to add their own customized classes. Please see the [contributing](CONTRIBUTING.md) and [conduct](CONDUCT.md) documents. diff --git a/README.md b/README.md index 11ce4eba..7f22c7dd 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ Status](https://travis-ci.org/ropensci/skimr.svg?branch=master)](https://travis- [![codecov](https://codecov.io/gh/ropensci/skimr/branch/master/graph/badge.svg)](https://codecov.io/gh/ropenscilabs/skimr) [![](https://badges.ropensci.org/175_status.svg)](https://github.com/ropensci/onboarding/issues/175) -`skimr` provides a frictionless approach to summary statistics which -conforms to the [principle of least +skimr provides a frictionless approach to summary +statistics which conforms to the [principle of least surprise](https://en.wikipedia.org/wiki/Principle_of_least_astonishment), displaying summary statistics the user can skim quickly to understand their data. It handles different data types and returns a `skim_df` @@ -18,9 +18,9 @@ human reader. Installation ------------ -The current released version of `skimr` can be installed from CRAN. If -you wish to install the current build of the next release you can do so -using the following: +The current released version of skimr can be installed from +CRAN. If you wish to install the current build of the next release you +can do so using the following: # install.packages("devtools") devtools::install_github("ropenscilabs/skimr") @@ -38,7 +38,7 @@ Do not rely on APIs from the develop branch. Skim statistics in the console ------------------------------ -`skimr`: +skimr: - Provides a larger set of statistics than `summary()`, including missing, complete, n, and sd. @@ -532,10 +532,10 @@ produces a long, tidy-format `skim_df` object that can be computed on. Customizing skimr ----------------- -Although skimr provides opinionated defaults, it is highly customizable. -Users can specify their own statistics, change the formatting of -results, create statistics for new classes and develop skimmers for data -structures that are not data frames. +Although skimr provides opinionated defaults, it is highly +customizable. Users can specify their own statistics, change the +formatting of results, create statistics for new classes and develop +skimmers for data structures that are not data frames. ### Specify your own statistics and classes @@ -564,10 +564,10 @@ data. ### Change formatting -Skimr provides a set of default formats that allow decimals in columns -to be aligned, a reasonable number of decimal places for numeric data, -and a representation of dates. Users can view thes with `show_formats()` -and modify them with `skim_format()`. +skimr provides a set of default formats that allow decimals +in columns to be aligned, a reasonable number of decimal places for +numeric data, and a representation of dates. Users can view these with +`show_formats()` and modify them with `skim_format()`. ### Skimming other objects @@ -590,12 +590,13 @@ code](http://r.789695.n4.nabble.com/Unicode-display-problem-with-data-frames-und for printing dataframes. While some cases have been addressed, there are, for example, reports of this issue in Emacs ESS. -This means that while `skimr` can render the histograms to the console -and in `kable()`, it cannot in other circumstances. This includes: +This means that while skimr can render the histograms to +the console and in `kable()`, it cannot in other circumstances. This +includes: -- rendering a `skimr` data frame within `pander()` -- converting a `skimr` data frame to a vanilla R data frame, but - tibbles render correctly +- rendering a skimr data frame within `pander()` +- converting a skimr data frame to a vanilla R data + frame, but tibbles render correctly One workaround for showing these characters in Windows is to set the CTYPE part of your locale to Chinese/Japanese/Korean with @@ -624,8 +625,8 @@ Contributing We welcome issue reports and pull requests, including potentially adding support for commonly used variable classes. However, in general, we -encourage users to take advantage of skimr's flexibility to add their -own customized classes. Please see the [contributing](CONTRIBUTING.md) -and [conduct](CONDUCT.md) documents. +encourage users to take advantage of skimr's flexibility to +add their own customized classes. Please see the +[contributing](CONTRIBUTING.md) and [conduct](CONDUCT.md) documents. [![ropenci\_footer](https://ropensci.org/public_images/ropensci_footer.png)](https://ropensci.org) diff --git a/codemeta.json b/codemeta.json index 67e59a03..09cfae15 100644 --- a/codemeta.json +++ b/codemeta.json @@ -10,7 +10,7 @@ "codeRepository": "https://github.com/ropenscilabs/skimr", "issueTracker": "https://github.com/ropenscilabs/skimr/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "1.0.4", + "version": "1.0.5", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -121,6 +121,12 @@ "givenName": "Harris", "familyName": "McGehee", "email": "mcgehee.harris@gmail.com" + }, + { + "@type": "Person", + "givenName": "Patrick", + "familyName": "Kennedy", + "email": "pkqstr@protonmail.com" } ], "copyrightHolder": [ @@ -353,7 +359,7 @@ ], "releaseNotes": "https://github.com/elinw/skimr/blob/master/NEWS.md", "readme": "https://github.com/elinw/skimr/blob/master/README.md", - "fileSize": "1656.814KB", + "fileSize": "1981.177KB", "contIntegration": [ "https://travis-ci.org/ropenscilabs/skimr", "https://codecov.io/gh/ropenscilabs/skimr" @@ -363,5 +369,13 @@ "url": "https://github.com/ropensci/onboarding/issues/175", "provider": "http://ropensci.org" }, - "keywords": "unconf17" + "keywords": [ + "unconf17", + "r", + "summary-statistics", + "ropensci", + "unconf", + "r-package", + "rstats" + ] } diff --git a/man/arrange.skim_df.Rd b/man/arrange.skim_df.Rd index 27c71250..13c77acf 100644 --- a/man/arrange.skim_df.Rd +++ b/man/arrange.skim_df.Rd @@ -4,14 +4,16 @@ \alias{arrange.skim_df} \title{Use dplyr verb arrange on skim_df objects.} \usage{ -arrange.skim_df(.data, ...) +\method{arrange}{skim_df}(.data, ..., .by_group = FALSE) } \arguments{ -\item{.data}{A tbl. All main verbs are S3 generics and provide methods -for \code{\link[=tbl_df]{tbl_df()}}, \code{\link[dtplyr:tbl_dt]{dtplyr::tbl_dt()}} and \code{\link[dbplyr:tbl_dbi]{dbplyr::tbl_dbi()}}.} +\item{.data}{A skim object} \item{...}{Comma separated list of unquoted variable names, or expressions -involving variable names. Use \code{\link[=desc]{desc()}} to sort a variable in descending order.} +involving variable names.} + +\item{.by_group}{If TRUE, will sort first by grouping variable. +Applies to grouped data frames only.} } \value{ skim_df object coerced to a data frame. diff --git a/man/filter.skim_df.Rd b/man/filter.skim_df.Rd index f70a4b3d..7b8a522c 100644 --- a/man/filter.skim_df.Rd +++ b/man/filter.skim_df.Rd @@ -4,24 +4,15 @@ \alias{filter.skim_df} \title{Use dplyr verb filter on skim_df objects.} \usage{ -filter.skim_df(.data, ..., .preserve = FALSE) +\method{filter}{skim_df}(.data, ..., .preserve = FALSE) } \arguments{ -\item{.data}{A tbl. All main verbs are S3 generics and provide methods -for \code{\link[=tbl_df]{tbl_df()}}, \code{\link[dtplyr:tbl_dt]{dtplyr::tbl_dt()}} and \code{\link[dbplyr:tbl_dbi]{dbplyr::tbl_dbi()}}.} +\item{.data}{A skim object} -\item{...}{Logical predicates defined in terms of the variables in \code{.data}. -Multiple conditions are combined with \code{&}. Only rows where the -condition evaluates to \code{TRUE} are kept. +\item{...}{Logical predicates defined in terms of the variables in .data.} -The arguments in \code{...} are automatically \link[rlang:quo]{quoted} and -\link[rlang:eval_tidy]{evaluated} in the context of the data -frame. They support \link[rlang:quasiquotation]{unquoting} and -splicing. See \code{vignette("programming")} for an introduction to -these concepts.} - -\item{.preserve}{when \code{FALSE} (the default), the grouping structure -is recalculated based on the resulting data, otherwise it is kept as is.} +\item{.preserve}{When FALSE (the default) the grouping structuree is recalculated +based on the resulting data.} } \value{ skim_df object coerced to a data frame. diff --git a/man/mutate.skim_df.Rd b/man/mutate.skim_df.Rd index f7f21641..b4e22f8c 100644 --- a/man/mutate.skim_df.Rd +++ b/man/mutate.skim_df.Rd @@ -4,24 +4,14 @@ \alias{mutate.skim_df} \title{Use dplyr verb mutate on skim_df objects.} \usage{ -mutate.skim_df(.data, ...) +\method{mutate}{skim_df}(.data, ...) } \arguments{ -\item{.data}{A tbl. All main verbs are S3 generics and provide methods -for \code{\link[=tbl_df]{tbl_df()}}, \code{\link[dtplyr:tbl_dt]{dtplyr::tbl_dt()}} and \code{\link[dbplyr:tbl_dbi]{dbplyr::tbl_dbi()}}.} +\item{.data}{A skim object} -\item{...}{Name-value pairs of expressions, each with length 1 or the same -length as the number of rows in the group (if using \code{\link[=group_by]{group_by()}}) or in the entire -input (if not using groups). The name of each argument will be the name of -a new variable, and the value will be its corresponding value. Use a \code{NULL} -value in \code{mutate} to drop a variable. New variables overwrite existing variables -of the same name. - -The arguments in \code{...} are automatically \link[rlang:quo]{quoted} and -\link[rlang:eval_tidy]{evaluated} in the context of the data -frame. They support \link[rlang:quasiquotation]{unquoting} and -splicing. See \code{vignette("programming")} for an introduction to -these concepts.} +\item{...}{Name-value pairs of expressions. The name of each argument will +be the name of a new variable, and the value will be its +corresponding value.} } \value{ skim_df object coerced to a data frame. diff --git a/man/reexports.Rd b/man/reexports.Rd index a6ad9587..66cdb9a4 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -4,21 +4,19 @@ \name{reexports} \alias{reexports} \alias{\%>\%} -\alias{reexports} \alias{contains} \alias{select_helpers} -\alias{reexports} \alias{ends_with} -\alias{reexports} \alias{everything} -\alias{reexports} \alias{matches} -\alias{reexports} \alias{num_range} -\alias{reexports} \alias{one_of} -\alias{reexports} \alias{starts_with} +\alias{select} +\alias{arrange} +\alias{filter} +\alias{mutate} +\alias{slice} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -26,6 +24,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ + \item{dplyr}{\code{\link[dplyr]{select}}, \code{\link[dplyr]{arrange}}, \code{\link[dplyr]{filter}}, \code{\link[dplyr]{mutate}}, \code{\link[dplyr]{slice}}} + \item{magrittr}{\code{\link[magrittr]{\%>\%}}} \item{tidyselect}{\code{\link[tidyselect]{contains}}, \code{\link[tidyselect]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect]{matches}}, \code{\link[tidyselect]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} diff --git a/man/select.skim_df.Rd b/man/select.skim_df.Rd index 6a5e2477..ed134876 100644 --- a/man/select.skim_df.Rd +++ b/man/select.skim_df.Rd @@ -4,30 +4,12 @@ \alias{select.skim_df} \title{Use dplyr verb select on skim_df objects.} \usage{ -select.skim_df(.data, ...) +\method{select}{skim_df}(.data, ...) } \arguments{ -\item{.data}{A tbl. All main verbs are S3 generics and provide methods -for \code{\link[=tbl_df]{tbl_df()}}, \code{\link[dtplyr:tbl_dt]{dtplyr::tbl_dt()}} and \code{\link[dbplyr:tbl_dbi]{dbplyr::tbl_dbi()}}.} +\item{.data}{A skim object} -\item{...}{One or more unquoted expressions separated by commas. -You can treat variable names like they are positions, so you can -use expressions like \code{x:y} to select ranges of variables. - -Positive values select variables; negative values drop variables. -If the first expression is negative, \code{select()} will automatically -start with all variables. - -Use named arguments, e.g. \code{new_name = old_name}, to rename selected variables. - -The arguments in \code{...} are automatically \link[rlang:quo]{quoted} and -\link[rlang:eval_tidy]{evaluated} in a context where column names -represent column positions. They also support -\link[rlang:quasiquotation]{unquoting} and splicing. See -\code{vignette("programming")} for an introduction to these concepts. - -See \link[tidyselect:select_helpers]{select helpers} for more details and -examples about tidyselect helpers such as \code{starts_with()}, \code{everything()}, ...} +\item{...}{One or more unquoted expressions separated by commas.} } \value{ skim_df object coerced to a data frame. diff --git a/man/slice.skim_df.Rd b/man/slice.skim_df.Rd index 48bf18e2..585412f1 100644 --- a/man/slice.skim_df.Rd +++ b/man/slice.skim_df.Rd @@ -4,24 +4,17 @@ \alias{slice.skim_df} \title{Use dplyr verb slice on skim_df objects.} \usage{ -slice.skim_df(.data, ..., .preserve = FALSE) +\method{slice}{skim_df}(.data, ..., .preserve = FALSE) } \arguments{ -\item{.data}{A tbl. All main verbs are S3 generics and provide methods -for \code{\link[=tbl_df]{tbl_df()}}, \code{\link[dtplyr:tbl_dt]{dtplyr::tbl_dt()}} and \code{\link[dbplyr:tbl_dbi]{dbplyr::tbl_dbi()}}.} +\item{.data}{A skim object} -\item{...}{Logical predicates defined in terms of the variables in \code{.data}. -Multiple conditions are combined with \code{&}. Only rows where the -condition evaluates to \code{TRUE} are kept. +\item{...}{Integer row calues. Provide either positive values to keep or +negative values to drop.} -The arguments in \code{...} are automatically \link[rlang:quo]{quoted} and -\link[rlang:eval_tidy]{evaluated} in the context of the data -frame. They support \link[rlang:quasiquotation]{unquoting} and -splicing. See \code{vignette("programming")} for an introduction to -these concepts.} - -\item{.preserve}{when \code{FALSE} (the default), the grouping structure -is recalculated based on the resulting data, otherwise it is kept as is.} +\item{.preserve}{when FALSE (the default), the grouping structure is +recalculated based on the resulting data, otherwise +it is kept as is.} } \value{ skim_df object coerced to a data frame. diff --git a/tests/testthat/test-stats.R b/tests/testthat/test-stats.R index efcf8e8c..e107842c 100644 --- a/tests/testthat/test-stats.R +++ b/tests/testthat/test-stats.R @@ -121,6 +121,14 @@ test_that("min_char is calculated correctly, including empty strings.", { expect_identical(input, correct) }) +test_that("min_char with a multibyte character does not throw an error.", { + data <- c("a", "ab", "abc", "Coleophora asteris M\x9fhl.") + #correct <- as.integer(1) + #input <- min_char(data) + #expect_identical(input, correct) + expect_error(min_char(data), NA) +}) + test_that("max_char is calculated correctly, including empty strings.", { data<-c("a", "ab", "abc", "") correct <- as.integer(3) @@ -128,6 +136,14 @@ test_that("max_char is calculated correctly, including empty strings.", { expect_identical(input, correct) }) +test_that("max_char with a multibyte character does not throw an error.", { + data <- c("a", "ab", "abc", "Coleophora asteris M\x9fhl.") + #correct <- as.integer(3) + #input <- max_char(data) + #expect_identical(input, correct) + expect_error(max_char(data, NA)) +}) + test_that("n_unique is calculated correctly.", { correct <- 4L data<-c("a", "ab", "abc", "") diff --git a/vignettes/Skimr_defaults.Rmd b/vignettes/Skimr_defaults.Rmd index 08044370..be4d573a 100644 --- a/vignettes/Skimr_defaults.Rmd +++ b/vignettes/Skimr_defaults.Rmd @@ -3,7 +3,7 @@ title: "Skimr defaults" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Vignette Title} + %\VignetteIndexEntry{Skimr defaults} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- diff --git a/vignettes/Using_skimr.Rmd b/vignettes/Using_skimr.Rmd index 332e7db5..6c2f8b26 100644 --- a/vignettes/Using_skimr.Rmd +++ b/vignettes/Using_skimr.Rmd @@ -172,7 +172,7 @@ iris_setosa <- iris %>% ``` -- `skim_to_list()` returns a nqmed list of the wide data frames for each data +- `skim_to_list()` returns a named list of the wide data frames for each data type. These data frames contain the formatted, character values, meaning that they are most useful for display. In general, users will want to store the results in an object for further handling.