From d2a9b7271fc4495d3829e30b5a22d20446f02b1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mine=20=C3=87etinkaya-Rundel?= <cetinkaya.mine@gmail.com>
Date: Tue, 28 May 2024 00:08:24 -0400
Subject: [PATCH] data set -> dataset, closes #75

---
 DESCRIPTION                                        |  6 +++---
 R/buildAxis.R                                      |  6 +++---
 R/data-absenteeism.R                               |  2 +-
 R/data-ami_occurrences.R                           |  2 +-
 R/data-arbuthnot.R                                 |  2 +-
 R/data-association.R                               |  2 +-
 R/data-ball_bearing.R                              |  2 +-
 R/data-births14.R                                  |  6 +++---
 R/data-books.R                                     |  2 +-
 R/data-cars93.R                                    |  2 +-
 R/data-children_gender_stereo.R                    |  6 +++---
 R/data-climate70.R                                 |  2 +-
 R/data-corr_match.R                                |  4 ++--
 R/data-cpr.R                                       |  2 +-
 R/data-credits.R                                   |  2 +-
 R/data-drone_blades.R                              |  2 +-
 R/data-email50.R                                   |  2 +-
 R/data-env_regulation.R                            |  2 +-
 R/data-esi.R                                       |  2 +-
 R/data-family_college.R                            |  2 +-
 R/data-friday.R                                    |  2 +-
 R/data-gradestv.R                                  |  4 ++--
 R/data-housing.R                                   |  2 +-
 R/data-ipod.R                                      |  2 +-
 R/data-jury.R                                      |  4 ++--
 R/data-loans_full_schema.R                         |  4 ++--
 R/data-london_murders.R                            |  2 +-
 R/data-mammals.R                                   |  2 +-
 R/data-mariokart.R                                 |  6 +++---
 R/data-military.R                                  |  6 +++---
 R/data-mlb_teams.R                                 |  2 +-
 R/data-movies.R                                    |  2 +-
 R/data-mtl.R                                       |  2 +-
 R/data-nba_finals.R                                |  2 +-
 R/data-nba_finals_teams.R                          |  2 +-
 R/data-ncbirths.R                                  |  6 +++---
 R/data-nyc.R                                       |  4 +++-
 R/data-outliers.R                                  |  2 +-
 R/data-pew_energy_2018.R                           |  2 +-
 R/data-photo_classify.R                            |  2 +-
 R/data-piracy.R                                    |  4 ++--
 R/data-playing_cards.R                             |  2 +-
 R/data-possum.R                                    |  2 +-
 R/data-race_justice.R                              |  2 +-
 R/data-rosling_responses.R                         |  2 +-
 R/data-russian_influence_on_us_election_2016.R     |  2 +-
 R/data-simulated_dist.R                            | 10 +++++-----
 R/data-simulated_normal.R                          |  2 +-
 R/data-smoking.R                                   |  2 +-
 R/data-soda.R                                      |  2 +-
 R/data-student_sleep.R                             |  2 +-
 R/data-teacher.R                                   |  2 +-
 R/data-tips.R                                      |  6 +++---
 R/data-toohey.R                                    |  2 +-
 R/data-toy_anova.R                                 |  4 ++--
 R/data-ucla_textbooks_f18.R                        |  2 +-
 R/data-ukdemo.R                                    |  2 +-
 R/data-unempl.R                                    |  2 +-
 R/data-yrbss_samp.R                                |  2 +-
 R/write_pkg_data.R                                 | 10 +++++-----
 README.Rmd                                         |  2 +-
 README.md                                          | 14 +++++++-------
 .../simpsons_paradox_covid-dataprep.R              |  5 +++--
 man/absenteeism.Rd                                 |  2 +-
 man/ami_occurrences.Rd                             |  2 +-
 man/arbuthnot.Rd                                   |  2 +-
 man/association.Rd                                 |  2 +-
 man/ball_bearing.Rd                                |  2 +-
 man/births14.Rd                                    |  6 +++---
 man/books.Rd                                       |  2 +-
 man/buildAxis.Rd                                   |  6 +++---
 man/cars93.Rd                                      |  2 +-
 man/children_gender_stereo.Rd                      |  6 +++---
 man/climate70.Rd                                   |  2 +-
 man/corr_match.Rd                                  |  4 ++--
 man/cpr.Rd                                         |  2 +-
 man/credits.Rd                                     |  2 +-
 man/drone_blades.Rd                                |  2 +-
 man/email50.Rd                                     |  2 +-
 man/env_regulation.Rd                              |  2 +-
 man/esi.Rd                                         |  2 +-
 man/family_college.Rd                              |  2 +-
 man/friday.Rd                                      |  2 +-
 man/gradestv.Rd                                    |  4 ++--
 man/housing.Rd                                     |  2 +-
 man/ipod.Rd                                        |  2 +-
 man/jury.Rd                                        |  4 ++--
 man/loans_full_schema.Rd                           |  4 ++--
 man/london_murders.Rd                              |  2 +-
 man/mammals.Rd                                     |  2 +-
 man/mariokart.Rd                                   |  6 +++---
 man/military.Rd                                    |  6 +++---
 man/mlb_teams.Rd                                   |  2 +-
 man/movies.Rd                                      |  2 +-
 man/mtl.Rd                                         |  2 +-
 man/nba_finals.Rd                                  |  2 +-
 man/nba_finals_teams.Rd                            |  2 +-
 man/ncbirths.Rd                                    |  6 +++---
 man/nyc.Rd                                         |  4 +++-
 man/openintro-package.Rd                           |  4 ++--
 man/outliers.Rd                                    |  2 +-
 man/pew_energy_2018.Rd                             |  2 +-
 man/photo_classify.Rd                              |  2 +-
 man/piracy.Rd                                      |  4 ++--
 man/playing_cards.Rd                               |  2 +-
 man/possum.Rd                                      |  2 +-
 man/race_justice.Rd                                |  2 +-
 man/rosling_responses.Rd                           |  2 +-
 man/russian_influence_on_us_election_2016.Rd       |  2 +-
 man/simulated_dist.Rd                              | 10 +++++-----
 man/simulated_normal.Rd                            |  2 +-
 man/smoking.Rd                                     |  2 +-
 man/soda.Rd                                        |  2 +-
 man/student_sleep.Rd                               |  2 +-
 man/teacher.Rd                                     |  2 +-
 man/tips.Rd                                        |  6 +++---
 man/toohey.Rd                                      |  2 +-
 man/toy_anova.Rd                                   |  4 ++--
 man/ucla_textbooks_f18.Rd                          |  2 +-
 man/ukdemo.Rd                                      |  2 +-
 man/unempl.Rd                                      |  2 +-
 man/write_pkg_data.Rd                              |  2 +-
 man/yrbss_samp.Rd                                  |  2 +-
 123 files changed, 190 insertions(+), 185 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 068be50e..aae4f068 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: openintro
-Title: Data Sets and Supplemental Functions from 'OpenIntro' Textbooks and Labs
-Version: 2.4.0
+Title: Datasets and Supplemental Functions from 'OpenIntro' Textbooks and Labs
+Version: 2.5.0
 Authors@R: c(
     person("Mine", "\u00C7etinkaya-Rundel", email = "cetinkaya.mine@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-6452-2420")),
     person("David", "Diez", email = "david.m.diez@gmail.com", role = c("aut")),
@@ -13,7 +13,7 @@ Authors@R: c(
     )
 Description: Supplemental functions and data for 'OpenIntro' resources, which 
     includes open-source textbooks and resources for introductory statistics 
-    (<https://www.openintro.org/>). The package contains data sets used in our 
+    (<https://www.openintro.org/>). The package contains datasets used in our 
     open-source textbooks along with custom plotting functions for reproducing 
     book figures. Note that many functions and examples include color 
     transparency; some plotting elements may not show up properly (or at all) 
diff --git a/R/buildAxis.R b/R/buildAxis.R
index d1c72a6c..38b7b844 100644
--- a/R/buildAxis.R
+++ b/R/buildAxis.R
@@ -4,8 +4,8 @@
 #' of labels on the axis. This function is still under development.
 #'
 #' The primary reason behind building this function was to allow a plot to be
-#' created with similar features but with different data sets. For instance, if
-#' a set of code was written for one data set and the function \code{axis} had
+#' created with similar features but with different datasets. For instance, if
+#' a set of code was written for one dataset and the function \code{axis} had
 #' been utilized with pre-specified values, the axis may not match the plot of
 #' a new set of data. The function \code{buildAxis} addresses this problem by
 #' allowing the number of axis labels to be specified and controlled.
@@ -15,7 +15,7 @@
 #' with the best score.
 #'
 #' @param side The side of the plot where to add the axis.
-#' @param limits Either lower and upper limits on the axis or a data set.
+#' @param limits Either lower and upper limits on the axis or a dataset.
 #' @param n The preferred number of axis labels.
 #' @param nMin The minimum number of axis labels.
 #' @param nMax The maximum number of axis labels.
diff --git a/R/data-absenteeism.R b/R/data-absenteeism.R
index ac436500..737c337a 100644
--- a/R/data-absenteeism.R
+++ b/R/data-absenteeism.R
@@ -20,7 +20,7 @@
 #' @source Venables WN, Ripley BD. 2002. Modern Applied Statistics with S.
 #' Fourth Edition. New York: Springer.
 #'
-#' Data can also be found in the R `MASS` package under the data set name
+#' Data can also be found in the R `MASS` package under the dataset name
 #' `quine`.
 #' @keywords datasets
 #' @examples
diff --git a/R/data-ami_occurrences.R b/R/data-ami_occurrences.R
index 997d7970..a7421b51 100644
--- a/R/data-ami_occurrences.R
+++ b/R/data-ami_occurrences.R
@@ -1,6 +1,6 @@
 #' Acute Myocardial Infarction (Heart Attack) Events
 #'
-#' This data set is simulated but contains realistic occurrences of AMI in NY
+#' This dataset is simulated but contains realistic occurrences of AMI in NY
 #' City.
 #'
 #'
diff --git a/R/data-arbuthnot.R b/R/data-arbuthnot.R
index 97588499..31a83974 100644
--- a/R/data-arbuthnot.R
+++ b/R/data-arbuthnot.R
@@ -15,7 +15,7 @@
 #'   \item{boys}{number of male christenings (births)}
 #'   \item{girls}{number of female christenings (births)}
 #' }
-#' @source These data are excerpted from the `Arbuthnot` data set in the
+#' @source These data are excerpted from the `Arbuthnot` dataset in the
 #' [HistData](https://CRAN.R-project.org/package=HistData) package.
 #' @examples
 #'
diff --git a/R/data-association.R b/R/data-association.R
index a224ee11..b68615f2 100644
--- a/R/data-association.R
+++ b/R/data-association.R
@@ -1,6 +1,6 @@
 #' Simulated data for association plots
 #'
-#' Simulated data set.
+#' Simulated dataset.
 #'
 #'
 #' @name association
diff --git a/R/data-ball_bearing.R b/R/data-ball_bearing.R
index 88b70b48..6e125029 100644
--- a/R/data-ball_bearing.R
+++ b/R/data-ball_bearing.R
@@ -1,6 +1,6 @@
 #' Lifespan of ball bearings
 #'
-#' A simulated data set on lifespan of ball bearings.
+#' A simulated dataset on lifespan of ball bearings.
 #'
 #'
 #' @name ball_bearing
diff --git a/R/data-births14.R b/R/data-births14.R
index 154a1438..4e7b3bcd 100644
--- a/R/data-births14.R
+++ b/R/data-births14.R
@@ -1,10 +1,10 @@
 #' US births
 #'
-#' Every year, the US releases to the public a large data set containing
-#' information on births recorded in the country. This data set has been of
+#' Every year, the US releases to the public a large dataset containing
+#' information on births recorded in the country. This dataset has been of
 #' interest to medical researchers who are studying the relation between habits
 #' and practices of expectant mothers and the birth of their children. This is a
-#' random sample of 1,000 cases from the data set released in 2014.
+#' random sample of 1,000 cases from the dataset released in 2014.
 #'
 #' @source United States Department of Health and Human Services.
 #' Centers for Disease Control and Prevention.
diff --git a/R/data-books.R b/R/data-books.R
index 7e32e0d0..89c481af 100644
--- a/R/data-books.R
+++ b/R/data-books.R
@@ -1,6 +1,6 @@
 #' Sample of books on a shelf
 #'
-#' Simulated data set.
+#' Simulated dataset.
 #'
 #'
 #' @name books
diff --git a/R/data-cars93.R b/R/data-cars93.R
index 0f5b0e7c..60348e85 100644
--- a/R/data-cars93.R
+++ b/R/data-cars93.R
@@ -1,7 +1,7 @@
 #' cars93
 #'
 #' A data frame with 54 rows and 6 columns. This data is a subset of the
-#' \code{Cars93} data set from the \code{MASS} package.
+#' \code{Cars93} dataset from the \code{MASS} package.
 #'
 #' These cars represent a random sample for 1993 models that were in both
 #' \emph{Consumer Reports} and \emph{PACE Buying Guide}. Only vehicles of type
diff --git a/R/data-children_gender_stereo.R b/R/data-children_gender_stereo.R
index 5320a515..5c547380 100644
--- a/R/data-children_gender_stereo.R
+++ b/R/data-children_gender_stereo.R
@@ -2,7 +2,7 @@
 #'
 #' Stereotypes are common, but at what age do they start? This study
 #' investigates stereotypes in young children aged 5-7 years old. There are
-#' four studies reported in the paper, and all four data sets are provided here.
+#' four studies reported in the paper, and all four datasets are provided here.
 #'
 #' The structure of the data object is a little unusual, so we recommend
 #' reviewing the Examples section before starting your analysis.
@@ -22,7 +22,7 @@
 #' that are among the following:
 #' \describe{
 #'   \item{subject}{Subject ID. Note that Subject 1 in the first data frame
-#'   (data set) does \bold{not} correspond to Subject 1 in the second data frame.}
+#'   (dataset) does \bold{not} correspond to Subject 1 in the second data frame.}
 #'   \item{gender}{Gender of the subject.}
 #'   \item{age}{Age of the subject, in years.}
 #'   \item{trait}{The trait that the children were making a judgement about,
@@ -55,7 +55,7 @@
 #' @keywords datasets
 #' @examples
 #'
-#' # This data set is a little funny to work with.
+#' # This dataset is a little funny to work with.
 #' # If wanting to review the data for a study, we
 #' # recommend first assigning the corresponding
 #' # data frame to a new variable. For instance,
diff --git a/R/data-climate70.R b/R/data-climate70.R
index 776ce704..c2e24eae 100644
--- a/R/data-climate70.R
+++ b/R/data-climate70.R
@@ -28,7 +28,7 @@
 #' # Data sampled are from the US, Europe, and Australia.
 #' # This geographic limitation may be due to the particular
 #' # years considered, since locations without both 1948 and
-#' # 2018 were discarded for this (simple) data set.
+#' # 2018 were discarded for this (simple) dataset.
 #' plot(climate70$longitude, climate70$latitude)
 #'
 #' plot(climate70$dx70_1948, climate70$dx70_2018)
diff --git a/R/data-corr_match.R b/R/data-corr_match.R
index f8762a7f..820fc6b3 100644
--- a/R/data-corr_match.R
+++ b/R/data-corr_match.R
@@ -1,4 +1,4 @@
-#' Sample data sets for correlation problems
+#' Sample datasets for correlation problems
 #'
 #' Simulated data.
 #'
@@ -18,7 +18,7 @@
 #'   \item{y7}{a numeric vector}
 #'   \item{y8}{a numeric vector}
 #'   }
-#' @source Simulated data set.
+#' @source Simulated dataset.
 #' @keywords datasets
 #' @examples
 #'
diff --git a/R/data-cpr.R b/R/data-cpr.R
index 10320635..2dddfd0d 100644
--- a/R/data-cpr.R
+++ b/R/data-cpr.R
@@ -1,4 +1,4 @@
-#' CPR data set
+#' CPR dataset
 #'
 #' These patients were randomly divided into a treatment group where they
 #' received a blood thinner or the control group where they did not receive a
diff --git a/R/data-credits.R b/R/data-credits.R
index 71e3a7b6..5ac5df37 100644
--- a/R/data-credits.R
+++ b/R/data-credits.R
@@ -1,6 +1,6 @@
 #' College credits.
 #'
-#' A simulated data set of number of credits taken by college students each
+#' A simulated dataset of number of credits taken by college students each
 #' semester.
 #'
 #'
diff --git a/R/data-drone_blades.R b/R/data-drone_blades.R
index 8a66531c..57702681 100644
--- a/R/data-drone_blades.R
+++ b/R/data-drone_blades.R
@@ -1,6 +1,6 @@
 #' Quadcopter Drone Blades
 #'
-#' Quality control data set for quadcopter drone blades, where this data has
+#' Quality control dataset for quadcopter drone blades, where this data has
 #' been made up for an example.
 #'
 #'
diff --git a/R/data-email50.R b/R/data-email50.R
index a13c0e59..34fe407c 100644
--- a/R/data-email50.R
+++ b/R/data-email50.R
@@ -1,6 +1,6 @@
 #' Sample of 50 emails
 #'
-#' This is a subsample of the \code{\link{email}} data set.
+#' This is a subsample of the \code{\link{email}} dataset.
 #'
 #'
 #' @name email50
diff --git a/R/data-env_regulation.R b/R/data-env_regulation.R
index 86af78e8..79f7f4a8 100644
--- a/R/data-env_regulation.R
+++ b/R/data-env_regulation.R
@@ -12,7 +12,7 @@
 #'
 #' The actual sample size was 1012. However, the original data were not from a
 #' simple random sample; after accounting for the design, the equivalent sample
-#' size was about 705, which was what was used for the data set here to keep
+#' size was about 705, which was what was used for the dataset here to keep
 #' things simpler for intro stat analyses.
 #'
 #' @name env_regulation
diff --git a/R/data-esi.R b/R/data-esi.R
index 7752ae78..d84142b3 100644
--- a/R/data-esi.R
+++ b/R/data-esi.R
@@ -1,6 +1,6 @@
 #' Environmental Sustainability Index 2005
 #'
-#' This data set comes from the 2005 Environmental Sustainability Index:
+#' This dataset comes from the 2005 Environmental Sustainability Index:
 #' Benchmarking National Environmental Stewardship.  Countries are given an
 #' overall sustainability score as well as scores in each of several different
 #' environmental areas.
diff --git a/R/data-family_college.R b/R/data-family_college.R
index 33f21d21..c902f3df 100644
--- a/R/data-family_college.R
+++ b/R/data-family_college.R
@@ -1,6 +1,6 @@
 #' Simulated sample of parent / teen college attendance
 #'
-#' A simulated data set based on real population summaries.
+#' A simulated dataset based on real population summaries.
 #'
 #'
 #' @name family_college
diff --git a/R/data-friday.R b/R/data-friday.R
index 93d2d066..f7feec33 100644
--- a/R/data-friday.R
+++ b/R/data-friday.R
@@ -1,6 +1,6 @@
 #' Friday the 13th
 #'
-#' This data set addresses issues of how superstitions regarding Friday the
+#' This dataset addresses issues of how superstitions regarding Friday the
 #' 13th affect human behavior, and whether Friday the 13th is an unlucky day.
 #' Scanlon, et al. collected data on traffic and shopping patterns and accident
 #' frequency for Fridays the 6th and 13th between October of 1989 and November
diff --git a/R/data-gradestv.R b/R/data-gradestv.R
index 4ddd76de..fdc0b6ed 100644
--- a/R/data-gradestv.R
+++ b/R/data-gradestv.R
@@ -1,10 +1,10 @@
 #' Simulated data for analyzing the relationship between watching TV and grades
 #'
-#' This is a simulated data set to be used to estimate the relationship between
+#' This is a simulated dataset to be used to estimate the relationship between
 #' number of hours per week students watch TV and the grade they got in a
 #' statistics class.
 #'
-#' There are a few potential outliers in this data set. When analyzing the data
+#' There are a few potential outliers in this dataset. When analyzing the data
 #' one should consider how (if at all) these outliers may affect the estimates
 #' of correlation coefficient and regression parameters.
 #'
diff --git a/R/data-housing.R b/R/data-housing.R
index dbf206a5..68ddb8ea 100644
--- a/R/data-housing.R
+++ b/R/data-housing.R
@@ -1,4 +1,4 @@
-#' Simulated data set on student housing
+#' Simulated dataset on student housing
 #'
 #' Each observation represents a simulated rent price for a student.
 #'
diff --git a/R/data-ipod.R b/R/data-ipod.R
index d04a5cca..3071ac95 100644
--- a/R/data-ipod.R
+++ b/R/data-ipod.R
@@ -1,6 +1,6 @@
 #' Length of songs on an iPod
 #'
-#' A simulated data set on lengths of songs on an iPod.
+#' A simulated dataset on lengths of songs on an iPod.
 #'
 #'
 #' @name ipod
diff --git a/R/data-jury.R b/R/data-jury.R
index 15c44ff3..94d1136f 100644
--- a/R/data-jury.R
+++ b/R/data-jury.R
@@ -1,6 +1,6 @@
-#' Simulated juror data set
+#' Simulated juror dataset
 #'
-#' Simulated data set of registered voters proportions and representation on
+#' Simulated dataset of registered voters proportions and representation on
 #' juries.
 #'
 #'
diff --git a/R/data-loans_full_schema.R b/R/data-loans_full_schema.R
index dcdb81e7..d033633d 100644
--- a/R/data-loans_full_schema.R
+++ b/R/data-loans_full_schema.R
@@ -1,13 +1,13 @@
 #' Loan data from Lending Club
 #'
-#' This data set represents thousands of loans made through the Lending Club
+#' This dataset represents thousands of loans made through the Lending Club
 #' platform, which is a platform that allows individuals to lend to other
 #' individuals. Of course, not all loans are created equal. Someone who is a
 #' essentially a sure bet to pay back a loan will have an easier time getting a
 #' loan with a low interest rate than someone who appears to be riskier. And
 #' for people who are very risky? They may not even get a loan offer, or they
 #' may not have accepted the loan offer due to a high interest rate. It is
-#' important to keep that last part in mind, since this data set only
+#' important to keep that last part in mind, since this dataset only
 #' represents loans actually made, i.e. do not mistake this data for loan
 #' applications!
 #'
diff --git a/R/data-london_murders.R b/R/data-london_murders.R
index 92933da0..38c065d0 100644
--- a/R/data-london_murders.R
+++ b/R/data-london_murders.R
@@ -4,7 +4,7 @@
 #' recorded in the Greater London area by the Metropolitan Police from January
 #' 1, 2006 to September 7, 2011.
 #'
-#' To visualize this data set using a map, see the
+#' To visualize this dataset using a map, see the
 #' \code{\link{london_boroughs}} dataset, which contains the latitude and
 #' longitude of polygons that define the boundaries of the 32 boroughs of
 #' Greater London.
diff --git a/R/data-mammals.R b/R/data-mammals.R
index 81ac1134..bfc7c880 100644
--- a/R/data-mammals.R
+++ b/R/data-mammals.R
@@ -1,6 +1,6 @@
 #' Sleep in Mammals
 #'
-#' This data set includes data for 39 species of mammals distributed over 13
+#' This dataset includes data for 39 species of mammals distributed over 13
 #' orders. The data were used for analyzing the relationship between
 #' constitutional and ecological factors and sleeping in mammals. Two
 #' qualitatively different sleep variables (dreaming and non dreaming) were
diff --git a/R/data-mariokart.R b/R/data-mariokart.R
index 0e726275..0adce947 100644
--- a/R/data-mariokart.R
+++ b/R/data-mariokart.R
@@ -8,10 +8,10 @@
 #' one should do when encountering an outlier: examine the data point and
 #' remove it only if there is a good reason. In these two cases, we can see
 #' from the auction titles that they included other items in their auctions
-#' besides the game, which justifies removing them from the data set.
+#' besides the game, which justifies removing them from the dataset.
 #'
-#' This data set includes all auctions for a full week in October 2009.
-#' Auctions were included in the data set if they satisfied a number of
+#' This dataset includes all auctions for a full week in October 2009.
+#' Auctions were included in the dataset if they satisfied a number of
 #' conditions. (1) They were included in a search for "wii mario kart" on
 #' ebay.com, (2) items were in the Video Games > Games > Nintendo Wii section
 #' of Ebay, (3) the listing was an auction and not exclusively a "Buy it Now"
diff --git a/R/data-military.R b/R/data-military.R
index 6f484565..3a5e3072 100644
--- a/R/data-military.R
+++ b/R/data-military.R
@@ -3,9 +3,9 @@
 #' This dataset contains demographic information on every member of the US
 #' armed forces including gender, race, and rank.
 #'
-#' The branches covered by this data set include the Army, Navy, Air Force, and
-#' Marine Corps.  Demographic information on the Coast Guard is contained in
-#' the original data set but has not been included here.
+#' The branches covered by this dataset include the Army, Navy, Air Force, and
+#' Marine Corps. Demographic information on the Coast Guard is contained in
+#' the original dataset but has not been included here.
 #'
 #' @name military
 #' @docType data
diff --git a/R/data-mlb_teams.R b/R/data-mlb_teams.R
index 8e0c73f2..602aac33 100644
--- a/R/data-mlb_teams.R
+++ b/R/data-mlb_teams.R
@@ -1,7 +1,7 @@
 #' Major League Baseball Teams Data.
 #'
 #' A subset of data on Major League Baseball teams from
-#' Lahman's Baseball Database. The full data set is available
+#' Lahman's Baseball Database. The full dataset is available
 #' in the [Lahman R package](https://github.com/cdalzell/Lahman).
 #'
 #' @name mlb_teams
diff --git a/R/data-movies.R b/R/data-movies.R
index 9d680c07..5b4aeba7 100644
--- a/R/data-movies.R
+++ b/R/data-movies.R
@@ -1,6 +1,6 @@
 #' movies
 #'
-#' A data set with information about movies released in 2003.
+#' A dataset with information about movies released in 2003.
 #'
 #' @name movies
 #' @docType data
diff --git a/R/data-mtl.R b/R/data-mtl.R
index 93e019b7..59702369 100644
--- a/R/data-mtl.R
+++ b/R/data-mtl.R
@@ -48,7 +48,7 @@
 #' \doi{10.1371/journal.pone.0195549}.
 #'
 #' Thank you to Professor Silas Bergen of Winona State University for pointing
-#' us to this data set!
+#' us to this dataset!
 #' @keywords datasets
 #' @examples
 #'
diff --git a/R/data-nba_finals.R b/R/data-nba_finals.R
index ffdfd671..9cdd51a9 100644
--- a/R/data-nba_finals.R
+++ b/R/data-nba_finals.R
@@ -1,6 +1,6 @@
 #' NBA Finals History
 #'
-#' This data set contains information about the teams who played in the NBA Finals from 1950 - 2022.
+#' This dataset contains information about the teams who played in the NBA Finals from 1950 - 2022.
 #'
 #' @name nba_finals
 #' @docType data
diff --git a/R/data-nba_finals_teams.R b/R/data-nba_finals_teams.R
index 55e52f62..6769a6d3 100644
--- a/R/data-nba_finals_teams.R
+++ b/R/data-nba_finals_teams.R
@@ -1,6 +1,6 @@
 #' NBA Finals Team Summary
 #'
-#' A data set with individual team summaries for the NBA Finals series from 1950 to 2022. To win the Finals, a team must win 4 games. The maximum number of games in a series is 7.
+#' A dataset with individual team summaries for the NBA Finals series from 1950 to 2022. To win the Finals, a team must win 4 games. The maximum number of games in a series is 7.
 #'
 #' Notes:
 #' 1. The Chicago Stags folded in 1950, the Washington Capitols in 1951 and the Baltimore Bullets in 1954.
diff --git a/R/data-ncbirths.R b/R/data-ncbirths.R
index 1ec27c11..aa588172 100644
--- a/R/data-ncbirths.R
+++ b/R/data-ncbirths.R
@@ -1,10 +1,10 @@
 #' North Carolina births, 1000 cases
 #'
-#' In 2004, the state of North Carolina released to the public a large data set
-#' containing information on births recorded in this state. This data set has
+#' In 2004, the state of North Carolina released to the public a large dataset
+#' containing information on births recorded in this state. This dataset has
 #' been of interest to medical researchers who are studying the relation
 #' between habits and practices of expectant mothers and the birth of their
-#' children. This is a random sample of 1,000 cases from this data set.
+#' children. This is a random sample of 1,000 cases from this dataset.
 #'
 #' @name ncbirths
 #' @docType data
diff --git a/R/data-nyc.R b/R/data-nyc.R
index 9261160a..bbe7dd98 100644
--- a/R/data-nyc.R
+++ b/R/data-nyc.R
@@ -1,6 +1,8 @@
 #' nyc
 #'
-#' Zagat is a public survey where anyone can provide scores to a restaurant. The scores from the general public are then gathered to produce ratings. This data set contains a list of 168 NYC restaurants and their Zagat Ratings.
+#' Zagat is a public survey where anyone can provide scores to a restaurant.
+#' The scores from the general public are then gathered to produce ratings.
+#' This dataset contains a list of 168 NYC restaurants and their Zagat Ratings.
 #'
 #' For each category the scales are as follows:
 #'
diff --git a/R/data-outliers.R b/R/data-outliers.R
index 2033d3d5..8b18838f 100644
--- a/R/data-outliers.R
+++ b/R/data-outliers.R
@@ -1,4 +1,4 @@
-#' Simulated data sets for different types of outliers
+#' Simulated datasets for different types of outliers
 #'
 #' Data sets for showing different types of outliers
 #'
diff --git a/R/data-pew_energy_2018.R b/R/data-pew_energy_2018.R
index 19ac4c30..0fdcf010 100644
--- a/R/data-pew_energy_2018.R
+++ b/R/data-pew_energy_2018.R
@@ -4,7 +4,7 @@
 #' including solar, wind, offshore drilling, hydrolic fracturing ("fracking"),
 #' coal, and nuclear.
 #'
-#' We did not have access to individual responses in original data set, so we
+#' We did not have access to individual responses in original dataset, so we
 #' took the published percentages and backed out the breakdown
 #'
 #' @name pew_energy_2018
diff --git a/R/data-photo_classify.R b/R/data-photo_classify.R
index 3fcf3df0..1f905ce6 100644
--- a/R/data-photo_classify.R
+++ b/R/data-photo_classify.R
@@ -1,6 +1,6 @@
 #' Photo classifications: fashion or not
 #'
-#' This is a simulated data set for photo classifications based on a machine
+#' This is a simulated dataset for photo classifications based on a machine
 #' learning algorithm versus what the true classification is for those photos.
 #' While the data are not real, they resemble performance that would be
 #' reasonable to expect in a well-built classifier.
diff --git a/R/data-piracy.R b/R/data-piracy.R
index 602ab8c2..e5eaf2c9 100644
--- a/R/data-piracy.R
+++ b/R/data-piracy.R
@@ -1,6 +1,6 @@
 #' Piracy and PIPA/SOPA
 #'
-#' This data set contains observations on all 100 US Senators and 434 of the
+#' This dataset contains observations on all 100 US Senators and 434 of the
 #' 325 US Congressional Representatives related to their support of anti-piracy
 #' legislation that was introduced at the end of 2011.
 #'
@@ -9,7 +9,7 @@
 #' US Senate, respectively, to curtail copyright infringement.  The bill was
 #' controversial because there were concerns the bill limited free speech
 #' rights.  ProPublica, the independent and non-profit news organization,
-#' compiled this data set to compare the stance of legislators towards the
+#' compiled this dataset to compare the stance of legislators towards the
 #' bills with the amount of campaign funds that they received from groups
 #' considered to be supportive of or in opposition to the legislation.
 #'
diff --git a/R/data-playing_cards.R b/R/data-playing_cards.R
index ba3f1648..c60d238d 100644
--- a/R/data-playing_cards.R
+++ b/R/data-playing_cards.R
@@ -11,7 +11,7 @@
 #'   \item{suit}{Card suit, which takes one of four values: \code{Club}, \code{Diamond}, \code{Heart}, or \code{Spade}.}
 #'   \item{face_card}{Whether the card counts as a face card.}
 #' }
-#' @source This extremely complex data set was generated from scratch.
+#' @source This extremely complex dataset was generated from scratch.
 #' @keywords datasets
 #' @examples
 #'
diff --git a/R/data-possum.R b/R/data-possum.R
index 7d26b51a..db5e34c7 100644
--- a/R/data-possum.R
+++ b/R/data-possum.R
@@ -1,7 +1,7 @@
 #' Possums in Australia and New Guinea
 #'
 #' Data representing possums in Australia and New Guinea. This is a copy of the
-#' data set by the same name in the `DAAG` package, however, the data set
+#' dataset by the same name in the `DAAG` package, however, the dataset
 #' included here includes fewer variables.
 #'
 #' @name possum
diff --git a/R/data-race_justice.R b/R/data-race_justice.R
index 13aa8e31..00abd04f 100644
--- a/R/data-race_justice.R
+++ b/R/data-race_justice.R
@@ -3,7 +3,7 @@
 #' Results from a Yahoo! News poll conducted by YouGov on May 29-31, 2020.
 #' In total 1060 U.S. adults were asked a series of questions regarding race and
 #' justice in the wake of the killing of George Floyd by a police officer.
-#' Results in this data set are percentages for the question, "Do you think
+#' Results in this dataset are percentages for the question, "Do you think
 #' Blacks and Whites receive equal treatment from the police?" For this
 #' particular question there were 1059 respondents.
 #'
diff --git a/R/data-rosling_responses.R b/R/data-rosling_responses.R
index 9da8d816..19c2c846 100644
--- a/R/data-rosling_responses.R
+++ b/R/data-rosling_responses.R
@@ -1,7 +1,7 @@
 #' Sample Responses to Two Public Health Questions
 #'
 #' Public health has improved and evolved, but has the public's knowledge
-#' changed with it? This data set explores sample responses for two survey
+#' changed with it? This dataset explores sample responses for two survey
 #' questions posed by Hans Rosling during lectures to a wide array of
 #' well-educated audiences.
 #'
diff --git a/R/data-russian_influence_on_us_election_2016.R b/R/data-russian_influence_on_us_election_2016.R
index f594689a..cb73e008 100644
--- a/R/data-russian_influence_on_us_election_2016.R
+++ b/R/data-russian_influence_on_us_election_2016.R
@@ -6,7 +6,7 @@
 #'
 #' The actual sample size was 1000. However, the original data were not from a
 #' simple random sample; after accounting for the design, the equivalent sample
-#' size was 506, which was what was used for the data set here to keep things
+#' size was 506, which was what was used for the dataset here to keep things
 #' simpler for intro stat analyses.
 #'
 #' @name russian_influence_on_us_election_2016
diff --git a/R/data-simulated_dist.R b/R/data-simulated_dist.R
index b40d6c6a..c82065b8 100644
--- a/R/data-simulated_dist.R
+++ b/R/data-simulated_dist.R
@@ -1,4 +1,4 @@
-#' Simulated data sets, not necessarily drawn from a normal distribution.
+#' Simulated datasets, not necessarily drawn from a normal distribution.
 #'
 #' Data were simulated in R, and some of the simulations do not represent data
 #' from actual normal distributions.
@@ -6,10 +6,10 @@
 #'
 #' @name simulated_dist
 #' @docType data
-#' @format The format is: List of 4 $ d1: data set of 100 observations.  $ d2:
-#' data set of 50 observations.  $ d3: num data set of 500 observations.  $ d4:
-#' data set of 15 observations.  $ d5: num data set of 25 observations.  $ d6:
-#' data set of 50 observations.
+#' @format The format is: List of 4 $ d1: dataset of 100 observations.  $ d2:
+#' dataset of 50 observations.  $ d3: num dataset of 500 observations.  $ d4:
+#' dataset of 15 observations.  $ d5: num dataset of 25 observations.  $ d6:
+#' dataset of 50 observations.
 #' @keywords datasets
 #' @examples
 #'
diff --git a/R/data-simulated_normal.R b/R/data-simulated_normal.R
index d13b37a6..01a96781 100644
--- a/R/data-simulated_normal.R
+++ b/R/data-simulated_normal.R
@@ -1,4 +1,4 @@
-#' Simulated data sets, drawn from a normal distribution.
+#' Simulated datasets, drawn from a normal distribution.
 #'
 #' Data were simulated using \code{\link{rnorm}}.
 #'
diff --git a/R/data-smoking.R b/R/data-smoking.R
index 7d7577f4..b4375672 100644
--- a/R/data-smoking.R
+++ b/R/data-smoking.R
@@ -1,6 +1,6 @@
 #' UK Smoking Data
 #'
-#' Survey data on smoking habits from the UK. The data set can be used for
+#' Survey data on smoking habits from the UK. The dataset can be used for
 #' analyzing the demographic characteristics of smokers and types of tobacco
 #' consumed.
 #'
diff --git a/R/data-soda.R b/R/data-soda.R
index 39db9140..a30dd685 100644
--- a/R/data-soda.R
+++ b/R/data-soda.R
@@ -1,6 +1,6 @@
 #' soda
 #'
-#' A randomly generated data set of soda preference (cola or orange) based on location.
+#' A randomly generated dataset of soda preference (cola or orange) based on location.
 #'
 #' @name soda
 #' @docType data
diff --git a/R/data-student_sleep.R b/R/data-student_sleep.R
index 2986a223..7eeb2460 100644
--- a/R/data-student_sleep.R
+++ b/R/data-student_sleep.R
@@ -1,6 +1,6 @@
 #' Sleep for 110 students (simulated)
 #'
-#' A simulated data set for how much 110 college students each slept in a
+#' A simulated dataset for how much 110 college students each slept in a
 #' single night.
 #'
 #'
diff --git a/R/data-teacher.R b/R/data-teacher.R
index bd7362df..375d1e83 100644
--- a/R/data-teacher.R
+++ b/R/data-teacher.R
@@ -1,6 +1,6 @@
 #' Teacher Salaries in St. Louis, Michigan
 #'
-#' This data set contains teacher salaries from 2009-2010 for 71 teachers
+#' This dataset contains teacher salaries from 2009-2010 for 71 teachers
 #' employed by the St. Louis Public School in Michigan, as well as several
 #' covariates.
 #'
diff --git a/R/data-tips.R b/R/data-tips.R
index 483fffcb..5043fb17 100644
--- a/R/data-tips.R
+++ b/R/data-tips.R
@@ -1,10 +1,10 @@
 #' Tip data
 #'
-#' A simulated data set of tips over a few weeks on a couple days per week.
+#' A simulated dataset of tips over a few weeks on a couple days per week.
 #' Each tip is associated with a single group, which may include several bills
 #' and tables (i.e. groups paid in one lump sum in simulations).
 #'
-#' This data set was built using simulations of tables, then bills, then tips
+#' This dataset was built using simulations of tables, then bills, then tips
 #' based on the bills. Large groups were assumed to only pay the gratuity,
 #' which is evident in the data. Tips were set to be plausible round values;
 #' they were often (but not always) rounded to dollars, quarters, etc.
@@ -19,7 +19,7 @@
 #'   \item{bill}{Total bill for the group.}
 #'   \item{tip}{Total tip from the group.}
 #'   }
-#' @source Simulated data set.
+#' @source Simulated dataset.
 #' @keywords datasets
 #' @examples
 #'
diff --git a/R/data-toohey.R b/R/data-toohey.R
index 66369411..da610863 100644
--- a/R/data-toohey.R
+++ b/R/data-toohey.R
@@ -1,4 +1,4 @@
-#' Simulated polling data set
+#' Simulated polling dataset
 #'
 #' Simulated data for a fake political candidate.
 #'
diff --git a/R/data-toy_anova.R b/R/data-toy_anova.R
index 86b93f34..0fa56477 100644
--- a/R/data-toy_anova.R
+++ b/R/data-toy_anova.R
@@ -1,6 +1,6 @@
-#' Simulated data set for ANOVA
+#' Simulated dataset for ANOVA
 #'
-#' Simulated data set for getting a better understanding of intuition that
+#' Simulated dataset for getting a better understanding of intuition that
 #' ANOVA is based off of.
 #'
 #'
diff --git a/R/data-ucla_textbooks_f18.R b/R/data-ucla_textbooks_f18.R
index 3b50048f..f690b48c 100644
--- a/R/data-ucla_textbooks_f18.R
+++ b/R/data-ucla_textbooks_f18.R
@@ -4,7 +4,7 @@
 #' corresponding textbook prices were collected from the UCLA bookstore and
 #' also from Amazon.
 #'
-#' A past data set was collected from UCLA courses in Spring 2010, and Amazon
+#' A past dataset was collected from UCLA courses in Spring 2010, and Amazon
 #' at that time was found to be almost uniformly lower than those of the UCLA
 #' bookstore's.  Now in 2018, the UCLA bookstore is about even with Amazon on
 #' the vast majority of titles, and there is no statistical difference in the
diff --git a/R/data-ukdemo.R b/R/data-ukdemo.R
index dc8f7751..62893399 100644
--- a/R/data-ukdemo.R
+++ b/R/data-ukdemo.R
@@ -1,6 +1,6 @@
 #' United Kingdom Demographic Data
 #'
-#' This data set comes from the Guardian's Data Blog and includes five
+#' This dataset comes from the Guardian's Data Blog and includes five
 #' financial demographic variables.
 #'
 #'
diff --git a/R/data-unempl.R b/R/data-unempl.R
index 738334cb..d3035598 100644
--- a/R/data-unempl.R
+++ b/R/data-unempl.R
@@ -1,6 +1,6 @@
 #' Annual unemployment since 1890
 #'
-#' A compilation of two data sets that provides an estimate of unemployment
+#' A compilation of two datasets that provides an estimate of unemployment
 #' from 1890 to 2010.
 #'
 #'
diff --git a/R/data-yrbss_samp.R b/R/data-yrbss_samp.R
index 652bc46c..acf7ba1e 100644
--- a/R/data-yrbss_samp.R
+++ b/R/data-yrbss_samp.R
@@ -1,6 +1,6 @@
 #' Sample of Youth Risk Behavior Surveillance System (YRBSS)
 #'
-#' A sample of the \code{\link{yrbss}} data set.
+#' A sample of the \code{\link{yrbss}} dataset.
 #'
 #'
 #' @name yrbss_samp
diff --git a/R/write_pkg_data.R b/R/write_pkg_data.R
index 78ed10b7..b72e0ce2 100644
--- a/R/write_pkg_data.R
+++ b/R/write_pkg_data.R
@@ -2,7 +2,7 @@
 #'
 #' The function should be run with a path to a package directory.
 #' It will then look through the `data` directory of the package,
-#' and for all data sets that are data frames, create CSV variants
+#' and for all datasets that are data frames, create CSV variants
 #' in a `data-csv` directory.
 #'
 #' @param pkg The R package where we'd like to generate CSVs of any
@@ -27,7 +27,7 @@ write_pkg_data <- function(pkg, dir = paste0("data-", out_type), overwrite = FAL
   stopifnot(dir != "")
   data_sets <- utils::data(package = pkg)$results[, 3]
   if ("tmp_data" %in% data_sets) {
-    warning("Data set `tmp_data` was omitted.")
+    warning("dataset `tmp_data` was omitted.")
     data_sets <- data_sets[data_sets != "tmp_data"]
   }
   # If the directory path ends in a /, remove it.
@@ -95,10 +95,10 @@ write_pkg_data <- function(pkg, dir = paste0("data-", out_type), overwrite = FAL
   skipped_listed <-
     ifelse(skipped_listed == "  - ", "", skipped_listed)
   message(
-    "\n\n- ", written, " data sets were written.\n",
-    "- ", overwrite_skip, " data sets were skipped, because they already ",
+    "\n\n- ", written, " datasets were written.\n",
+    "- ", overwrite_skip, " datasets were skipped, because they already ",
     "existed.\n", overwrite_skip_listed,
-    "- ", skipped, " data sets were not matrices or data frames and so ",
+    "- ", skipped, " datasets were not matrices or data frames and so ",
     "could not be written.\n", skipped_listed
   )
 }
diff --git a/README.Rmd b/README.Rmd
index 456edd68..6e601176 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -21,7 +21,7 @@ knitr::opts_chunk$set(
 <!-- badges: end -->
 
 Supplemental functions and data for OpenIntro resources, which includes  open-source textbooks and resources for introductory statistics at [openintro.org](https://www.openintro.org/). 
-The package contains data sets used in our open-source textbooks along with custom plotting functions for reproducing book figures. 
+The package contains datasets used in our open-source textbooks along with custom plotting functions for reproducing book figures. 
 The package also contains the datasets used in OpenIntro labs. 
 Note that many functions and examples include color transparency; some plotting elements may not show up properly (or at all) when run in some versions of Windows operating system.
 
diff --git a/README.md b/README.md
index 051941d6..08802fda 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,8 @@ downloads](http://cranlogs.r-pkg.org/badges/openintro)](https://cran.r-project.o
 
 Supplemental functions and data for OpenIntro resources, which includes
 open-source textbooks and resources for introductory statistics at
-[openintro.org](https://www.openintro.org/). The package contains data
-sets used in our open-source textbooks along with custom plotting
+[openintro.org](https://www.openintro.org/). The package contains
+datasets used in our open-source textbooks along with custom plotting
 functions for reproducing book figures. The package also contains the
 datasets used in OpenIntro labs. Note that many functions and examples
 include color transparency; some plotting elements may not show up
@@ -73,14 +73,14 @@ Process for adding new data to the package
 
 The following steps use the **devtools** and **usethis** packages for
 various steps. We recommend using this process when suggesting new
-datasets to be added to the package. If the dataset is large (>500MB) or
-you’d like to add a function, please open an issue first for discussion
-before making the pull request.
+datasets to be added to the package. If the dataset is large (\>500MB)
+or you’d like to add a function, please open an issue first for
+discussion before making the pull request.
 
 1.  Fork and clone the repo with
     `usethis::create_from_github("OpenIntroStat/openintro")`
-    -   Note: If you have write access to the repo, you can skip this
-        step.
+    - Note: If you have write access to the repo, you can skip this
+      step.
 2.  Start a new pull request with `usethis::pr_init("BRANCH-NAME")`,
     where `BRANCH-NAME` is an informative branch name.
 3.  If adding a file that is not an .rda file to begin with (Excel, csv,
diff --git a/data-raw/simpsons_paradox_covid/simpsons_paradox_covid-dataprep.R b/data-raw/simpsons_paradox_covid/simpsons_paradox_covid-dataprep.R
index f745afb4..d532879e 100644
--- a/data-raw/simpsons_paradox_covid/simpsons_paradox_covid-dataprep.R
+++ b/data-raw/simpsons_paradox_covid/simpsons_paradox_covid-dataprep.R
@@ -5,8 +5,9 @@ library(tidyr)
 
 # Load data ------------------------------------------------------------------
 
-### Create a folder in the "data-raw" folder with the same name as the data
-### set. Place the raw data file (.csv, .tsv, .xlsx, etc) here, then adjust ### the code below to match your data set.
+### Create a folder in the "data-raw" folder with the same name as the dataset.
+### Place the raw data file (.csv, .tsv, .xlsx, etc) here, then adjust
+### the code below to match your dataset.
 
 raw_data <- read_csv(here::here("data-raw/simpsons_paradox_covid/simpsons_paradox.csv"))
 
diff --git a/man/absenteeism.Rd b/man/absenteeism.Rd
index 84a28e4f..e98dbebb 100644
--- a/man/absenteeism.Rd
+++ b/man/absenteeism.Rd
@@ -19,7 +19,7 @@ slow learner (\code{SL}).}
 Venables WN, Ripley BD. 2002. Modern Applied Statistics with S.
 Fourth Edition. New York: Springer.
 
-Data can also be found in the R \code{MASS} package under the data set name
+Data can also be found in the R \code{MASS} package under the dataset name
 \code{quine}.
 }
 \usage{
diff --git a/man/ami_occurrences.Rd b/man/ami_occurrences.Rd
index 7402795f..baac66c0 100644
--- a/man/ami_occurrences.Rd
+++ b/man/ami_occurrences.Rd
@@ -14,7 +14,7 @@ A data frame with 365 observations on the following variable.
 ami_occurrences
 }
 \description{
-This data set is simulated but contains realistic occurrences of AMI in NY
+This dataset is simulated but contains realistic occurrences of AMI in NY
 City.
 }
 \examples{
diff --git a/man/arbuthnot.Rd b/man/arbuthnot.Rd
index 3014a1b8..4b757cb0 100644
--- a/man/arbuthnot.Rd
+++ b/man/arbuthnot.Rd
@@ -13,7 +13,7 @@ A tbl_df with with 82 rows and 3 variables:
 }
 }
 \source{
-These data are excerpted from the \code{Arbuthnot} data set in the
+These data are excerpted from the \code{Arbuthnot} dataset in the
 \href{https://CRAN.R-project.org/package=HistData}{HistData} package.
 }
 \usage{
diff --git a/man/association.Rd b/man/association.Rd
index 3c4da57a..7b6ee607 100644
--- a/man/association.Rd
+++ b/man/association.Rd
@@ -31,7 +31,7 @@ A data frame with 121 observations on the following 4 variables.
 association
 }
 \description{
-Simulated data set.
+Simulated dataset.
 }
 \examples{
 
diff --git a/man/ball_bearing.Rd b/man/ball_bearing.Rd
index 42fbe5d5..f4fac7dd 100644
--- a/man/ball_bearing.Rd
+++ b/man/ball_bearing.Rd
@@ -19,7 +19,7 @@ Simulated data.
 ball_bearing
 }
 \description{
-A simulated data set on lifespan of ball bearings.
+A simulated dataset on lifespan of ball bearings.
 }
 \examples{
 
diff --git a/man/births14.Rd b/man/births14.Rd
index 8baa1855..25ceb229 100644
--- a/man/births14.Rd
+++ b/man/births14.Rd
@@ -34,11 +34,11 @@ Inter-university Consortium for Political and Social Research, 2016-10-07.
 births14
 }
 \description{
-Every year, the US releases to the public a large data set containing
-information on births recorded in the country. This data set has been of
+Every year, the US releases to the public a large dataset containing
+information on births recorded in the country. This dataset has been of
 interest to medical researchers who are studying the relation between habits
 and practices of expectant mothers and the birth of their children. This is a
-random sample of 1,000 cases from the data set released in 2014.
+random sample of 1,000 cases from the dataset released in 2014.
 }
 \examples{
 
diff --git a/man/books.Rd b/man/books.Rd
index c122a163..1e307f44 100644
--- a/man/books.Rd
+++ b/man/books.Rd
@@ -15,7 +15,7 @@ A data frame with 95 observations on the following 2 variables.
 books
 }
 \description{
-Simulated data set.
+Simulated dataset.
 }
 \examples{
 
diff --git a/man/buildAxis.Rd b/man/buildAxis.Rd
index e1087b1d..d20b9eb2 100644
--- a/man/buildAxis.Rd
+++ b/man/buildAxis.Rd
@@ -9,7 +9,7 @@ buildAxis(side, limits, n, nMin = 2, nMax = 10, extend = 2, eps = 10^-12, ...)
 \arguments{
 \item{side}{The side of the plot where to add the axis.}
 
-\item{limits}{Either lower and upper limits on the axis or a data set.}
+\item{limits}{Either lower and upper limits on the axis or a dataset.}
 
 \item{n}{The preferred number of axis labels.}
 
@@ -32,8 +32,8 @@ of labels on the axis. This function is still under development.
 }
 \details{
 The primary reason behind building this function was to allow a plot to be
-created with similar features but with different data sets. For instance, if
-a set of code was written for one data set and the function \code{axis} had
+created with similar features but with different datasets. For instance, if
+a set of code was written for one dataset and the function \code{axis} had
 been utilized with pre-specified values, the axis may not match the plot of
 a new set of data. The function \code{buildAxis} addresses this problem by
 allowing the number of axis labels to be specified and controlled.
diff --git a/man/cars93.Rd b/man/cars93.Rd
index 7b42ba07..33544944 100644
--- a/man/cars93.Rd
+++ b/man/cars93.Rd
@@ -26,7 +26,7 @@ cars93
 }
 \description{
 A data frame with 54 rows and 6 columns. This data is a subset of the
-\code{Cars93} data set from the \code{MASS} package.
+\code{Cars93} dataset from the \code{MASS} package.
 }
 \details{
 These cars represent a random sample for 1993 models that were in both
diff --git a/man/children_gender_stereo.Rd b/man/children_gender_stereo.Rd
index 9e01c3f2..97390fc0 100644
--- a/man/children_gender_stereo.Rd
+++ b/man/children_gender_stereo.Rd
@@ -11,7 +11,7 @@ the referenced paper, and these data frames each have variables (columns)
 that are among the following:
 \describe{
 \item{subject}{Subject ID. Note that Subject 1 in the first data frame
-(data set) does \bold{not} correspond to Subject 1 in the second data frame.}
+(dataset) does \bold{not} correspond to Subject 1 in the second data frame.}
 \item{gender}{Gender of the subject.}
 \item{age}{Age of the subject, in years.}
 \item{trait}{The trait that the children were making a judgement about,
@@ -50,7 +50,7 @@ children_gender_stereo
 \description{
 Stereotypes are common, but at what age do they start? This study
 investigates stereotypes in young children aged 5-7 years old. There are
-four studies reported in the paper, and all four data sets are provided here.
+four studies reported in the paper, and all four datasets are provided here.
 }
 \details{
 The structure of the data object is a little unusual, so we recommend
@@ -65,7 +65,7 @@ here.
 }
 \examples{
 
-# This data set is a little funny to work with.
+# This dataset is a little funny to work with.
 # If wanting to review the data for a study, we
 # recommend first assigning the corresponding
 # data frame to a new variable. For instance,
diff --git a/man/climate70.Rd b/man/climate70.Rd
index ec30b38f..f2b58e78 100644
--- a/man/climate70.Rd
+++ b/man/climate70.Rd
@@ -37,7 +37,7 @@ information for those years.
 # Data sampled are from the US, Europe, and Australia.
 # This geographic limitation may be due to the particular
 # years considered, since locations without both 1948 and
-# 2018 were discarded for this (simple) data set.
+# 2018 were discarded for this (simple) dataset.
 plot(climate70$longitude, climate70$latitude)
 
 plot(climate70$dx70_1948, climate70$dx70_2018)
diff --git a/man/corr_match.Rd b/man/corr_match.Rd
index 07378ba0..0ca79cdc 100644
--- a/man/corr_match.Rd
+++ b/man/corr_match.Rd
@@ -4,7 +4,7 @@
 \name{corr_match}
 \alias{corr_match}
 \alias{corr.match}
-\title{Sample data sets for correlation problems}
+\title{Sample datasets for correlation problems}
 \format{
 A data frame with 121 observations on the following 9 variables.
 \describe{
@@ -20,7 +20,7 @@ A data frame with 121 observations on the following 9 variables.
 }
 }
 \source{
-Simulated data set.
+Simulated dataset.
 }
 \usage{
 corr_match
diff --git a/man/cpr.Rd b/man/cpr.Rd
index c9a62319..256b41f7 100644
--- a/man/cpr.Rd
+++ b/man/cpr.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{cpr}
 \alias{cpr}
-\title{CPR data set}
+\title{CPR dataset}
 \format{
 A data frame with 90 observations on the following 2 variables.
 \describe{
diff --git a/man/credits.Rd b/man/credits.Rd
index c0d5b626..0e352e04 100644
--- a/man/credits.Rd
+++ b/man/credits.Rd
@@ -17,7 +17,7 @@ Simulated data.
 credits
 }
 \description{
-A simulated data set of number of credits taken by college students each
+A simulated dataset of number of credits taken by college students each
 semester.
 }
 \examples{
diff --git a/man/drone_blades.Rd b/man/drone_blades.Rd
index 258ebd69..f65395ab 100644
--- a/man/drone_blades.Rd
+++ b/man/drone_blades.Rd
@@ -15,7 +15,7 @@ A data frame with 2000 observations on the following 2 variables.
 drone_blades
 }
 \description{
-Quality control data set for quadcopter drone blades, where this data has
+Quality control dataset for quadcopter drone blades, where this data has
 been made up for an example.
 }
 \examples{
diff --git a/man/email50.Rd b/man/email50.Rd
index ffda4657..258ddf8d 100644
--- a/man/email50.Rd
+++ b/man/email50.Rd
@@ -37,7 +37,7 @@ David Diez's Gmail Account, early months of 2012. All personally identifiable in
 email50
 }
 \description{
-This is a subsample of the \code{\link{email}} data set.
+This is a subsample of the \code{\link{email}} dataset.
 }
 \examples{
 
diff --git a/man/env_regulation.Rd b/man/env_regulation.Rd
index 80495585..e39f36e7 100644
--- a/man/env_regulation.Rd
+++ b/man/env_regulation.Rd
@@ -32,7 +32,7 @@ government regulations.
 
 The actual sample size was 1012. However, the original data were not from a
 simple random sample; after accounting for the design, the equivalent sample
-size was about 705, which was what was used for the data set here to keep
+size was about 705, which was what was used for the dataset here to keep
 things simpler for intro stat analyses.
 }
 \examples{
diff --git a/man/esi.Rd b/man/esi.Rd
index e2d987c0..eb075e90 100644
--- a/man/esi.Rd
+++ b/man/esi.Rd
@@ -53,7 +53,7 @@ Available at \url{https://www.earth.columbia.edu/news/2005/images/ESI2005_policy
 esi
 }
 \description{
-This data set comes from the 2005 Environmental Sustainability Index:
+This dataset comes from the 2005 Environmental Sustainability Index:
 Benchmarking National Environmental Stewardship.  Countries are given an
 overall sustainability score as well as scores in each of several different
 environmental areas.
diff --git a/man/family_college.Rd b/man/family_college.Rd
index b57dbc93..a3e46a88 100644
--- a/man/family_college.Rd
+++ b/man/family_college.Rd
@@ -19,7 +19,7 @@ Simulation based off of summary information provided at
 family_college
 }
 \description{
-A simulated data set based on real population summaries.
+A simulated dataset based on real population summaries.
 }
 \examples{
 
diff --git a/man/friday.Rd b/man/friday.Rd
index a7c5949c..bffc4d77 100644
--- a/man/friday.Rd
+++ b/man/friday.Rd
@@ -25,7 +25,7 @@ Friday the 13th Bad For Your Health?," BMJ, 307, 1584-1586.
 friday
 }
 \description{
-This data set addresses issues of how superstitions regarding Friday the
+This dataset addresses issues of how superstitions regarding Friday the
 13th affect human behavior, and whether Friday the 13th is an unlucky day.
 Scanlon, et al. collected data on traffic and shopping patterns and accident
 frequency for Fridays the 6th and 13th between October of 1989 and November
diff --git a/man/gradestv.Rd b/man/gradestv.Rd
index 4d900ee7..30d337e6 100644
--- a/man/gradestv.Rd
+++ b/man/gradestv.Rd
@@ -18,12 +18,12 @@ Simulated data
 gradestv
 }
 \description{
-This is a simulated data set to be used to estimate the relationship between
+This is a simulated dataset to be used to estimate the relationship between
 number of hours per week students watch TV and the grade they got in a
 statistics class.
 }
 \details{
-There are a few potential outliers in this data set. When analyzing the data
+There are a few potential outliers in this dataset. When analyzing the data
 one should consider how (if at all) these outliers may affect the estimates
 of correlation coefficient and regression parameters.
 }
diff --git a/man/housing.Rd b/man/housing.Rd
index d686d7b6..ebd006a0 100644
--- a/man/housing.Rd
+++ b/man/housing.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{housing}
 \alias{housing}
-\title{Simulated data set on student housing}
+\title{Simulated dataset on student housing}
 \format{
 A data frame with 75 observations on the following variable.
 \describe{\item{cost}{a numeric vector} }
diff --git a/man/ipod.Rd b/man/ipod.Rd
index 9545e514..05520332 100644
--- a/man/ipod.Rd
+++ b/man/ipod.Rd
@@ -17,7 +17,7 @@ Simulated data.
 ipod
 }
 \description{
-A simulated data set on lengths of songs on an iPod.
+A simulated dataset on lengths of songs on an iPod.
 }
 \examples{
 
diff --git a/man/jury.Rd b/man/jury.Rd
index f556eafe..fdf9f37a 100644
--- a/man/jury.Rd
+++ b/man/jury.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{jury}
 \alias{jury}
-\title{Simulated juror data set}
+\title{Simulated juror dataset}
 \format{
 A data frame with 275 observations on the following variable.
 \describe{\item{race}{a factor with levels \code{black} \code{hispanic}
@@ -13,7 +13,7 @@ A data frame with 275 observations on the following variable.
 jury
 }
 \description{
-Simulated data set of registered voters proportions and representation on
+Simulated dataset of registered voters proportions and representation on
 juries.
 }
 \examples{
diff --git a/man/loans_full_schema.Rd b/man/loans_full_schema.Rd
index 6fd7f0bc..8b54bf4a 100644
--- a/man/loans_full_schema.Rd
+++ b/man/loans_full_schema.Rd
@@ -123,14 +123,14 @@ through their platform.
 loans_full_schema
 }
 \description{
-This data set represents thousands of loans made through the Lending Club
+This dataset represents thousands of loans made through the Lending Club
 platform, which is a platform that allows individuals to lend to other
 individuals. Of course, not all loans are created equal. Someone who is a
 essentially a sure bet to pay back a loan will have an easier time getting a
 loan with a low interest rate than someone who appears to be riskier. And
 for people who are very risky? They may not even get a loan offer, or they
 may not have accepted the loan offer due to a high interest rate. It is
-important to keep that last part in mind, since this data set only
+important to keep that last part in mind, since this dataset only
 represents loans actually made, i.e. do not mistake this data for loan
 applications!
 }
diff --git a/man/london_murders.Rd b/man/london_murders.Rd
index c1808c4d..44749c92 100644
--- a/man/london_murders.Rd
+++ b/man/london_murders.Rd
@@ -26,7 +26,7 @@ recorded in the Greater London area by the Metropolitan Police from January
 1, 2006 to September 7, 2011.
 }
 \details{
-To visualize this data set using a map, see the
+To visualize this dataset using a map, see the
 \code{\link{london_boroughs}} dataset, which contains the latitude and
 longitude of polygons that define the boundaries of the 32 boroughs of
 Greater London.
diff --git a/man/mammals.Rd b/man/mammals.Rd
index 571c6bb1..c7417bfd 100644
--- a/man/mammals.Rd
+++ b/man/mammals.Rd
@@ -31,7 +31,7 @@ faces from other animals. This index is based upon Predation and Exposure. 1
 mammals
 }
 \description{
-This data set includes data for 39 species of mammals distributed over 13
+This dataset includes data for 39 species of mammals distributed over 13
 orders. The data were used for analyzing the relationship between
 constitutional and ecological factors and sleeping in mammals. Two
 qualitatively different sleep variables (dreaming and non dreaming) were
diff --git a/man/mariokart.Rd b/man/mariokart.Rd
index 9222c956..b479f7fa 100644
--- a/man/mariokart.Rd
+++ b/man/mariokart.Rd
@@ -45,10 +45,10 @@ there are two outliers in the data. These serve as a nice example of what
 one should do when encountering an outlier: examine the data point and
 remove it only if there is a good reason. In these two cases, we can see
 from the auction titles that they included other items in their auctions
-besides the game, which justifies removing them from the data set.
+besides the game, which justifies removing them from the dataset.
 
-This data set includes all auctions for a full week in October 2009.
-Auctions were included in the data set if they satisfied a number of
+This dataset includes all auctions for a full week in October 2009.
+Auctions were included in the dataset if they satisfied a number of
 conditions. (1) They were included in a search for "wii mario kart" on
 ebay.com, (2) items were in the Video Games > Games > Nintendo Wii section
 of Ebay, (3) the listing was an auction and not exclusively a "Buy it Now"
diff --git a/man/military.Rd b/man/military.Rd
index aba96bd8..f979db9f 100644
--- a/man/military.Rd
+++ b/man/military.Rd
@@ -28,9 +28,9 @@ This dataset contains demographic information on every member of the US
 armed forces including gender, race, and rank.
 }
 \details{
-The branches covered by this data set include the Army, Navy, Air Force, and
-Marine Corps.  Demographic information on the Coast Guard is contained in
-the original data set but has not been included here.
+The branches covered by this dataset include the Army, Navy, Air Force, and
+Marine Corps. Demographic information on the Coast Guard is contained in
+the original dataset but has not been included here.
 }
 \examples{
 \dontrun{
diff --git a/man/mlb_teams.Rd b/man/mlb_teams.Rd
index 00523bf5..ffc9a945 100644
--- a/man/mlb_teams.Rd
+++ b/man/mlb_teams.Rd
@@ -58,7 +58,7 @@ mlb_teams
 }
 \description{
 A subset of data on Major League Baseball teams from
-Lahman's Baseball Database. The full data set is available
+Lahman's Baseball Database. The full dataset is available
 in the \href{https://github.com/cdalzell/Lahman}{Lahman R package}.
 }
 \examples{
diff --git a/man/movies.Rd b/man/movies.Rd
index ba220b56..ff971a75 100644
--- a/man/movies.Rd
+++ b/man/movies.Rd
@@ -21,7 +21,7 @@ A data frame with 140 observations on the following 5 variables.
 movies
 }
 \description{
-A data set with information about movies released in 2003.
+A dataset with information about movies released in 2003.
 }
 \examples{
 
diff --git a/man/mtl.Rd b/man/mtl.Rd
index f9443d19..6951b3ce 100644
--- a/man/mtl.Rd
+++ b/man/mtl.Rd
@@ -45,7 +45,7 @@ middle-aged and older adults. PLoS ONE 13(4): e0195549.
 \doi{10.1371/journal.pone.0195549}.
 
 Thank you to Professor Silas Bergen of Winona State University for pointing
-us to this data set!
+us to this dataset!
 }
 \usage{
 mtl
diff --git a/man/nba_finals.Rd b/man/nba_finals.Rd
index ff5484b6..87334f21 100644
--- a/man/nba_finals.Rd
+++ b/man/nba_finals.Rd
@@ -25,7 +25,7 @@ A data frame with 73 rows and 9 variables:
 nba_finals
 }
 \description{
-This data set contains information about the teams who played in the NBA Finals from 1950 - 2022.
+This dataset contains information about the teams who played in the NBA Finals from 1950 - 2022.
 }
 \examples{
 library(dplyr)
diff --git a/man/nba_finals_teams.Rd b/man/nba_finals_teams.Rd
index d547a152..03988478 100644
--- a/man/nba_finals_teams.Rd
+++ b/man/nba_finals_teams.Rd
@@ -23,7 +23,7 @@ A data frame with 33 rows and 7 variables:
 nba_finals_teams
 }
 \description{
-A data set with individual team summaries for the NBA Finals series from 1950 to 2022. To win the Finals, a team must win 4 games. The maximum number of games in a series is 7.
+A dataset with individual team summaries for the NBA Finals series from 1950 to 2022. To win the Finals, a team must win 4 games. The maximum number of games in a series is 7.
 }
 \details{
 Notes:
diff --git a/man/ncbirths.Rd b/man/ncbirths.Rd
index 1c2e5088..9449ad0d 100644
--- a/man/ncbirths.Rd
+++ b/man/ncbirths.Rd
@@ -27,11 +27,11 @@ A data frame with 1000 observations on the following 13 variables.
 ncbirths
 }
 \description{
-In 2004, the state of North Carolina released to the public a large data set
-containing information on births recorded in this state. This data set has
+In 2004, the state of North Carolina released to the public a large dataset
+containing information on births recorded in this state. This dataset has
 been of interest to medical researchers who are studying the relation
 between habits and practices of expectant mothers and the birth of their
-children. This is a random sample of 1,000 cases from this data set.
+children. This is a random sample of 1,000 cases from this dataset.
 }
 \examples{
 
diff --git a/man/nyc.Rd b/man/nyc.Rd
index 3505f9b0..5b65a65d 100644
--- a/man/nyc.Rd
+++ b/man/nyc.Rd
@@ -19,7 +19,9 @@ A data frame with 168 observations on the following 6 variables.
 nyc
 }
 \description{
-Zagat is a public survey where anyone can provide scores to a restaurant. The scores from the general public are then gathered to produce ratings. This data set contains a list of 168 NYC restaurants and their Zagat Ratings.
+Zagat is a public survey where anyone can provide scores to a restaurant.
+The scores from the general public are then gathered to produce ratings.
+This dataset contains a list of 168 NYC restaurants and their Zagat Ratings.
 }
 \details{
 For each category the scales are as follows:
diff --git a/man/openintro-package.Rd b/man/openintro-package.Rd
index 9dbd5d8d..ab548521 100644
--- a/man/openintro-package.Rd
+++ b/man/openintro-package.Rd
@@ -4,11 +4,11 @@
 \name{openintro-package}
 \alias{openintro}
 \alias{openintro-package}
-\title{openintro: Data Sets and Supplemental Functions from 'OpenIntro' Textbooks and Labs}
+\title{openintro: Datasets and Supplemental Functions from 'OpenIntro' Textbooks and Labs}
 \description{
 \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
 
-Supplemental functions and data for 'OpenIntro' resources, which includes open-source textbooks and resources for introductory statistics (\url{https://www.openintro.org/}). The package contains data sets used in our open-source textbooks along with custom plotting functions for reproducing book figures. Note that many functions and examples include color transparency; some plotting elements may not show up properly (or at all) when run in some versions of Windows operating system.
+Supplemental functions and data for 'OpenIntro' resources, which includes open-source textbooks and resources for introductory statistics (\url{https://www.openintro.org/}). The package contains datasets used in our open-source textbooks along with custom plotting functions for reproducing book figures. Note that many functions and examples include color transparency; some plotting elements may not show up properly (or at all) when run in some versions of Windows operating system.
 }
 \seealso{
 Useful links:
diff --git a/man/outliers.Rd b/man/outliers.Rd
index eedf51aa..a8056a0a 100644
--- a/man/outliers.Rd
+++ b/man/outliers.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{outliers}
 \alias{outliers}
-\title{Simulated data sets for different types of outliers}
+\title{Simulated datasets for different types of outliers}
 \format{
 A data frame with 50 observations on the following 5 variables.
 \describe{
diff --git a/man/pew_energy_2018.Rd b/man/pew_energy_2018.Rd
index 144934f4..4a3f3b19 100644
--- a/man/pew_energy_2018.Rd
+++ b/man/pew_energy_2018.Rd
@@ -24,7 +24,7 @@ including solar, wind, offshore drilling, hydrolic fracturing ("fracking"),
 coal, and nuclear.
 }
 \details{
-We did not have access to individual responses in original data set, so we
+We did not have access to individual responses in original dataset, so we
 took the published percentages and backed out the breakdown
 }
 \examples{
diff --git a/man/photo_classify.Rd b/man/photo_classify.Rd
index e0c88305..09f4bf5e 100644
--- a/man/photo_classify.Rd
+++ b/man/photo_classify.Rd
@@ -18,7 +18,7 @@ The data are simulated / hypothetical.
 photo_classify
 }
 \description{
-This is a simulated data set for photo classifications based on a machine
+This is a simulated dataset for photo classifications based on a machine
 learning algorithm versus what the true classification is for those photos.
 While the data are not real, they resemble performance that would be
 reasonable to expect in a well-built classifier.
diff --git a/man/piracy.Rd b/man/piracy.Rd
index 7c66e966..d803b9ca 100644
--- a/man/piracy.Rd
+++ b/man/piracy.Rd
@@ -26,7 +26,7 @@ the legislation were in flux at the time of data collection.
 piracy
 }
 \description{
-This data set contains observations on all 100 US Senators and 434 of the
+This dataset contains observations on all 100 US Senators and 434 of the
 325 US Congressional Representatives related to their support of anti-piracy
 legislation that was introduced at the end of 2011.
 }
@@ -36,7 +36,7 @@ The Stop Online Piracy Act (SOPA) and the Protect Intellectual Property Act
 US Senate, respectively, to curtail copyright infringement.  The bill was
 controversial because there were concerns the bill limited free speech
 rights.  ProPublica, the independent and non-profit news organization,
-compiled this data set to compare the stance of legislators towards the
+compiled this dataset to compare the stance of legislators towards the
 bills with the amount of campaign funds that they received from groups
 considered to be supportive of or in opposition to the legislation.
 
diff --git a/man/playing_cards.Rd b/man/playing_cards.Rd
index 8b99b53c..60a31e04 100644
--- a/man/playing_cards.Rd
+++ b/man/playing_cards.Rd
@@ -13,7 +13,7 @@ A data frame with 52 observations on the following 2 variables.
 }
 }
 \source{
-This extremely complex data set was generated from scratch.
+This extremely complex dataset was generated from scratch.
 }
 \usage{
 playing_cards
diff --git a/man/possum.Rd b/man/possum.Rd
index 570b1502..33270766 100644
--- a/man/possum.Rd
+++ b/man/possum.Rd
@@ -29,7 +29,7 @@ possum
 }
 \description{
 Data representing possums in Australia and New Guinea. This is a copy of the
-data set by the same name in the \code{DAAG} package, however, the data set
+dataset by the same name in the \code{DAAG} package, however, the dataset
 included here includes fewer variables.
 }
 \examples{
diff --git a/man/race_justice.Rd b/man/race_justice.Rd
index 597fcc67..2478f30f 100644
--- a/man/race_justice.Rd
+++ b/man/race_justice.Rd
@@ -22,7 +22,7 @@ race_justice
 Results from a Yahoo! News poll conducted by YouGov on May 29-31, 2020.
 In total 1060 U.S. adults were asked a series of questions regarding race and
 justice in the wake of the killing of George Floyd by a police officer.
-Results in this data set are percentages for the question, "Do you think
+Results in this dataset are percentages for the question, "Do you think
 Blacks and Whites receive equal treatment from the police?" For this
 particular question there were 1059 respondents.
 }
diff --git a/man/rosling_responses.Rd b/man/rosling_responses.Rd
index 684ccabc..3a7e7551 100644
--- a/man/rosling_responses.Rd
+++ b/man/rosling_responses.Rd
@@ -27,7 +27,7 @@ rosling_responses
 }
 \description{
 Public health has improved and evolved, but has the public's knowledge
-changed with it? This data set explores sample responses for two survey
+changed with it? This dataset explores sample responses for two survey
 questions posed by Hans Rosling during lectures to a wide array of
 well-educated audiences.
 }
diff --git a/man/russian_influence_on_us_election_2016.Rd b/man/russian_influence_on_us_election_2016.Rd
index 6623076e..b7829354 100644
--- a/man/russian_influence_on_us_election_2016.Rd
+++ b/man/russian_influence_on_us_election_2016.Rd
@@ -26,7 +26,7 @@ Pew Research.
 \details{
 The actual sample size was 1000. However, the original data were not from a
 simple random sample; after accounting for the design, the equivalent sample
-size was 506, which was what was used for the data set here to keep things
+size was 506, which was what was used for the dataset here to keep things
 simpler for intro stat analyses.
 }
 \examples{
diff --git a/man/simulated_dist.Rd b/man/simulated_dist.Rd
index de5e10bd..b2770d22 100644
--- a/man/simulated_dist.Rd
+++ b/man/simulated_dist.Rd
@@ -3,12 +3,12 @@
 \docType{data}
 \name{simulated_dist}
 \alias{simulated_dist}
-\title{Simulated data sets, not necessarily drawn from a normal distribution.}
+\title{Simulated datasets, not necessarily drawn from a normal distribution.}
 \format{
-The format is: List of 4 $ d1: data set of 100 observations.  $ d2:
-data set of 50 observations.  $ d3: num data set of 500 observations.  $ d4:
-data set of 15 observations.  $ d5: num data set of 25 observations.  $ d6:
-data set of 50 observations.
+The format is: List of 4 $ d1: dataset of 100 observations.  $ d2:
+dataset of 50 observations.  $ d3: num dataset of 500 observations.  $ d4:
+dataset of 15 observations.  $ d5: num dataset of 25 observations.  $ d6:
+dataset of 50 observations.
 }
 \usage{
 simulated_dist
diff --git a/man/simulated_normal.Rd b/man/simulated_normal.Rd
index 29fb8d88..d55fcbf7 100644
--- a/man/simulated_normal.Rd
+++ b/man/simulated_normal.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{simulated_normal}
 \alias{simulated_normal}
-\title{Simulated data sets, drawn from a normal distribution.}
+\title{Simulated datasets, drawn from a normal distribution.}
 \format{
 The format is: List of 3 $ n40 : 40 observations from a standard
 normal distribution.  $ n100: 100 observations from a standard normal
diff --git a/man/smoking.Rd b/man/smoking.Rd
index 15e1928f..85370e80 100644
--- a/man/smoking.Rd
+++ b/man/smoking.Rd
@@ -43,7 +43,7 @@ National STEM Centre, Large Datasets from stats4schools,
 smoking
 }
 \description{
-Survey data on smoking habits from the UK. The data set can be used for
+Survey data on smoking habits from the UK. The dataset can be used for
 analyzing the demographic characteristics of smokers and types of tobacco
 consumed.
 }
diff --git a/man/soda.Rd b/man/soda.Rd
index 7cf9bd4f..3523c9a8 100644
--- a/man/soda.Rd
+++ b/man/soda.Rd
@@ -15,7 +15,7 @@ A data frame with 60 observations on the following 2 variables.
 soda
 }
 \description{
-A randomly generated data set of soda preference (cola or orange) based on location.
+A randomly generated dataset of soda preference (cola or orange) based on location.
 }
 \examples{
 
diff --git a/man/student_sleep.Rd b/man/student_sleep.Rd
index 300e4538..62f243b8 100644
--- a/man/student_sleep.Rd
+++ b/man/student_sleep.Rd
@@ -17,7 +17,7 @@ Simulated data.
 student_sleep
 }
 \description{
-A simulated data set for how much 110 college students each slept in a
+A simulated dataset for how much 110 college students each slept in a
 single night.
 }
 \examples{
diff --git a/man/teacher.Rd b/man/teacher.Rd
index 72bcbcfe..6ea3450f 100644
--- a/man/teacher.Rd
+++ b/man/teacher.Rd
@@ -24,7 +24,7 @@ Originally posted on SODA Developers (dev.socrata.com/data), removed in 2020.
 teacher
 }
 \description{
-This data set contains teacher salaries from 2009-2010 for 71 teachers
+This dataset contains teacher salaries from 2009-2010 for 71 teachers
 employed by the St. Louis Public School in Michigan, as well as several
 covariates.
 }
diff --git a/man/tips.Rd b/man/tips.Rd
index 5e32e163..8128aa10 100644
--- a/man/tips.Rd
+++ b/man/tips.Rd
@@ -15,18 +15,18 @@ A data frame with 95 observations on the following 5 variables.
 }
 }
 \source{
-Simulated data set.
+Simulated dataset.
 }
 \usage{
 tips
 }
 \description{
-A simulated data set of tips over a few weeks on a couple days per week.
+A simulated dataset of tips over a few weeks on a couple days per week.
 Each tip is associated with a single group, which may include several bills
 and tables (i.e. groups paid in one lump sum in simulations).
 }
 \details{
-This data set was built using simulations of tables, then bills, then tips
+This dataset was built using simulations of tables, then bills, then tips
 based on the bills. Large groups were assumed to only pay the gratuity,
 which is evident in the data. Tips were set to be plausible round values;
 they were often (but not always) rounded to dollars, quarters, etc.
diff --git a/man/toohey.Rd b/man/toohey.Rd
index cecb14ab..40b3af14 100644
--- a/man/toohey.Rd
+++ b/man/toohey.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{toohey}
 \alias{toohey}
-\title{Simulated polling data set}
+\title{Simulated polling dataset}
 \format{
 A data frame with 500 observations on the following variable.
 \describe{
diff --git a/man/toy_anova.Rd b/man/toy_anova.Rd
index 36d620a8..ad3468cd 100644
--- a/man/toy_anova.Rd
+++ b/man/toy_anova.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{toy_anova}
 \alias{toy_anova}
-\title{Simulated data set for ANOVA}
+\title{Simulated dataset for ANOVA}
 \format{
 A data frame with 70 observations on the following 3 variables.
 \describe{
@@ -15,7 +15,7 @@ A data frame with 70 observations on the following 3 variables.
 toy_anova
 }
 \description{
-Simulated data set for getting a better understanding of intuition that
+Simulated dataset for getting a better understanding of intuition that
 ANOVA is based off of.
 }
 \examples{
diff --git a/man/ucla_textbooks_f18.Rd b/man/ucla_textbooks_f18.Rd
index 85432d78..a91e09d0 100644
--- a/man/ucla_textbooks_f18.Rd
+++ b/man/ucla_textbooks_f18.Rd
@@ -45,7 +45,7 @@ corresponding textbook prices were collected from the UCLA bookstore and
 also from Amazon.
 }
 \details{
-A past data set was collected from UCLA courses in Spring 2010, and Amazon
+A past dataset was collected from UCLA courses in Spring 2010, and Amazon
 at that time was found to be almost uniformly lower than those of the UCLA
 bookstore's.  Now in 2018, the UCLA bookstore is about even with Amazon on
 the vast majority of titles, and there is no statistical difference in the
diff --git a/man/ukdemo.Rd b/man/ukdemo.Rd
index 9ae54db0..853cc317 100644
--- a/man/ukdemo.Rd
+++ b/man/ukdemo.Rd
@@ -25,7 +25,7 @@ retrieved 2011-11-01.
 ukdemo
 }
 \description{
-This data set comes from the Guardian's Data Blog and includes five
+This dataset comes from the Guardian's Data Blog and includes five
 financial demographic variables.
 }
 \examples{
diff --git a/man/unempl.Rd b/man/unempl.Rd
index 59fe8db8..4ba049c7 100644
--- a/man/unempl.Rd
+++ b/man/unempl.Rd
@@ -43,7 +43,7 @@ during those years. The methods are described in detail by Coen and Romer.
 unempl
 }
 \description{
-A compilation of two data sets that provides an estimate of unemployment
+A compilation of two datasets that provides an estimate of unemployment
 from 1890 to 2010.
 }
 \examples{
diff --git a/man/write_pkg_data.Rd b/man/write_pkg_data.Rd
index cd13ad53..f2f724f7 100644
--- a/man/write_pkg_data.Rd
+++ b/man/write_pkg_data.Rd
@@ -29,7 +29,7 @@ the object (\code{"R"}).}
 \description{
 The function should be run with a path to a package directory.
 It will then look through the \code{data} directory of the package,
-and for all data sets that are data frames, create CSV variants
+and for all datasets that are data frames, create CSV variants
 in a \code{data-csv} directory.
 }
 \examples{
diff --git a/man/yrbss_samp.Rd b/man/yrbss_samp.Rd
index 52bef687..e2a44f20 100644
--- a/man/yrbss_samp.Rd
+++ b/man/yrbss_samp.Rd
@@ -35,7 +35,7 @@ get on a school night?}
 yrbss_samp
 }
 \description{
-A sample of the \code{\link{yrbss}} data set.
+A sample of the \code{\link{yrbss}} dataset.
 }
 \examples{