From 5b71391ca4b194aef5d4a668e1051b62adf941a1 Mon Sep 17 00:00:00 2001 From: Ted Laderas Date: Mon, 16 Sep 2024 14:42:45 -0700 Subject: [PATCH] rebasing is fun --- docs/chapter1.html | 76 +- docs/chapter2.html | 367 +- .../libs/bootstrap/bootstrap-icons.css | 2078 ++ .../libs/bootstrap/bootstrap-icons.woff | Bin 0 -> 176200 bytes .../libs/bootstrap/bootstrap.min.css | 12 + .../libs/bootstrap/bootstrap.min.js | 7 + .../libs/clipboard/clipboard.min.js | 7 + .../libs/quarto-html/anchor.min.js | 9 + .../libs/quarto-html/popper.min.js | 6 + .../quarto-syntax-highlighting.css | 203 + .../chapter2_files/libs/quarto-html/quarto.js | 899 + .../chapter2_files/libs/quarto-html/tippy.css | 1 + .../libs/quarto-html/tippy.umd.min.js | 2 + docs/chapter3.html | 471 +- .../libs/bootstrap/bootstrap-icons.css | 2078 ++ .../libs/bootstrap/bootstrap-icons.woff | Bin 0 -> 176200 bytes .../libs/bootstrap/bootstrap.min.css | 12 + .../libs/bootstrap/bootstrap.min.js | 7 + .../libs/clipboard/clipboard.min.js | 7 + .../live-runtime/live-runtime.css | 1 + .../live-runtime/live-runtime.js | 130 + .../live-runtime/pyodide-worker.js | 18 + .../libs/quarto-html/anchor.min.js | 9 + .../libs/quarto-html/popper.min.js | 6 + .../quarto-syntax-highlighting.css | 205 + .../chapter3_files/libs/quarto-html/quarto.js | 908 + .../chapter3_files/libs/quarto-html/tippy.css | 1 + .../libs/quarto-html/tippy.umd.min.js | 2 + .../libs/quarto-ojs/quarto-ojs-runtime.js | 24005 ++++++++++++++++ .../libs/quarto-ojs/quarto-ojs.css | 163 + docs/chapter3b.html | 314 +- docs/index.html | 29 + docs/search.json | 64 +- docs/sitemap.xml | 2 +- 34 files changed, 31604 insertions(+), 495 deletions(-) create mode 100644 docs/chapter2_files/libs/bootstrap/bootstrap-icons.css create mode 100644 docs/chapter2_files/libs/bootstrap/bootstrap-icons.woff create mode 100644 docs/chapter2_files/libs/bootstrap/bootstrap.min.css create mode 100644 docs/chapter2_files/libs/bootstrap/bootstrap.min.js create mode 100644 docs/chapter2_files/libs/clipboard/clipboard.min.js create mode 100644 docs/chapter2_files/libs/quarto-html/anchor.min.js create mode 100644 docs/chapter2_files/libs/quarto-html/popper.min.js create mode 100644 docs/chapter2_files/libs/quarto-html/quarto-syntax-highlighting.css create mode 100644 docs/chapter2_files/libs/quarto-html/quarto.js create mode 100644 docs/chapter2_files/libs/quarto-html/tippy.css create mode 100644 docs/chapter2_files/libs/quarto-html/tippy.umd.min.js create mode 100644 docs/chapter3_files/libs/bootstrap/bootstrap-icons.css create mode 100644 docs/chapter3_files/libs/bootstrap/bootstrap-icons.woff create mode 100644 docs/chapter3_files/libs/bootstrap/bootstrap.min.css create mode 100644 docs/chapter3_files/libs/bootstrap/bootstrap.min.js create mode 100644 docs/chapter3_files/libs/clipboard/clipboard.min.js create mode 100644 docs/chapter3_files/libs/quarto-contrib/live-runtime/live-runtime.css create mode 100644 docs/chapter3_files/libs/quarto-contrib/live-runtime/live-runtime.js create mode 100644 docs/chapter3_files/libs/quarto-contrib/live-runtime/pyodide-worker.js create mode 100644 docs/chapter3_files/libs/quarto-html/anchor.min.js create mode 100644 docs/chapter3_files/libs/quarto-html/popper.min.js create mode 100644 docs/chapter3_files/libs/quarto-html/quarto-syntax-highlighting.css create mode 100644 docs/chapter3_files/libs/quarto-html/quarto.js create mode 100644 docs/chapter3_files/libs/quarto-html/tippy.css create mode 100644 docs/chapter3_files/libs/quarto-html/tippy.umd.min.js create mode 100644 docs/chapter3_files/libs/quarto-ojs/quarto-ojs-runtime.js create mode 100644 docs/chapter3_files/libs/quarto-ojs/quarto-ojs.css diff --git a/docs/chapter1.html b/docs/chapter1.html index 7f5749d..51bdaa7 100644 --- a/docs/chapter1.html +++ b/docs/chapter1.html @@ -238,19 +238,19 @@

2 
@@ -259,7 +259,7 @@

2  -eyJjb2RlIjoib3B0aW9ucyh3YXJuID0gLTEsIHNob3cuZXJyb3IubWVzc2FnZXMgPSBGQUxTRSlcbmxpYnJhcnkoZHBseXIpXG5saWJyYXJ5KGdncGxvdDIpXG5saWJyYXJ5KGdhcG1pbmRlcilcbmdhcDE5OTIgPC0gZ2FwbWluZGVyICU+JSBmaWx0ZXIoeWVhciA9PSAxOTkyKSIsImF0dHIiOnsiZXZhbCI6dHJ1ZSwiZWNobyI6ZmFsc2UsImVkaXQiOmZhbHNlfX0= +eyJhdHRyIjp7ImVjaG8iOmZhbHNlLCJlZGl0IjpmYWxzZSwiZXZhbCI6dHJ1ZX0sImNvZGUiOiJvcHRpb25zKHdhcm4gPSAtMSwgc2hvdy5lcnJvci5tZXNzYWdlcyA9IEZBTFNFKVxubGlicmFyeShkcGx5cilcbmxpYnJhcnkoZ2dwbG90MilcbmxpYnJhcnkoZ2FwbWluZGVyKVxuZ2FwMTk5MiA8LSBnYXBtaW5kZXIgJT4lIGZpbHRlcih5ZWFyID09IDE5OTIpIn0=

@@ -287,7 +287,7 @@

-eyJjb2RlIjoiIyNydW4gaGVhZCBvbiBnYXAxOTkyXG5oZWFkKC0tLS0pXG4jI3J1biBjb2xuYW1lcyBoZXJlIG9uIGdhcDE5OTJcbmNvbG5hbWVzKC0tLS0pXG4jI3J1biBucm93KCkgb24gZ2FwMTk5MlxubnJvdygtLS0tLSkiLCJhdHRyIjp7ImV4ZXJjaXNlIjoiZXhfMSIsImV2YWwiOnRydWUsImVkaXQiOnRydWV9fQ== +eyJhdHRyIjp7ImV4ZXJjaXNlIjoiZXhfMSIsImVkaXQiOnRydWUsImV2YWwiOnRydWV9LCJjb2RlIjoiIyNydW4gaGVhZCBvbiBnYXAxOTkyXG5oZWFkKC0tLS0pXG4jI3J1biBjb2xuYW1lcyBoZXJlIG9uIGdhcDE5OTJcbmNvbG5hbWVzKC0tLS0pXG4jI3J1biBucm93KCkgb24gZ2FwMTk5MlxubnJvdygtLS0tLSkifQ== @@ -324,22 +324,22 @@

-
- + +
- +
- +
-
+
-
@@ -360,7 +360,7 @@

-eyJjb2RlIjoiZ2dwbG90KGRhdGEgPSBnYXAxOTkyLCBtYXBwaW5nID0gYWVzKHggPSBsb2coZ2RwUGVyY2FwKSwgeT1sb2cocG9wKSkpICtcbiAgZ2VvbV9wb2ludCgpIiwiYXR0ciI6eyJlZGl0IjpmYWxzZSwiZXZhbCI6dHJ1ZX19 +eyJhdHRyIjp7ImVkaXQiOmZhbHNlLCJldmFsIjp0cnVlfSwiY29kZSI6ImdncGxvdChkYXRhID0gZ2FwMTk5MiwgbWFwcGluZyA9IGFlcyh4ID0gbG9nKGdkcFBlcmNhcCksIHk9bG9nKHBvcCkpKSArXG4gIGdlb21fcG9pbnQoKSJ9

@@ -382,7 +382,7 @@

@@ -391,7 +391,7 @@

@@ -444,22 +444,22 @@

Which of the following is not a mappable aesthetic to geom_point()?

-
- - + + +
- +
- - + +
-
+
-
@@ -478,7 +478,7 @@

@@ -514,7 +514,7 @@

@@ -545,22 +545,22 @@

-
- + +
- +
- +
-
+
-
@@ -582,7 +582,7 @@

@@ -629,10 +629,10 @@

-eyJvcHRpb25zIjp7ImJhc2VVcmwiOiJodHRwczovL3dlYnIuci13YXNtLm9yZy92MC40LjEvIn0sInBhY2thZ2VzIjp7InBrZ3MiOlsiZXZhbHVhdGUiLCJrbml0ciIsImh0bWx0b29scyIsImdncGxvdDIiLCJnYXBtaW5kZXIiLCJjaGVja2Rvd24iLCJkcGx5ciJdLCJyZXBvcyI6W119LCJyZW5kZXJfZGYiOiJkZWZhdWx0In0= +eyJwYWNrYWdlcyI6eyJyZXBvcyI6W10sInBrZ3MiOlsiZXZhbHVhdGUiLCJrbml0ciIsImh0bWx0b29scyIsImdncGxvdDIiLCJnYXBtaW5kZXIiLCJjaGVja2Rvd24iLCJkcGx5ciJdfSwicmVuZGVyX2RmIjoiZGVmYXVsdCIsIm9wdGlvbnMiOnsiYmFzZVVybCI6Imh0dHBzOi8vd2Vici5yLXdhc20ub3JnL3YwLjQuMS8ifX0=
diff --git a/docs/chapter2.html b/docs/chapter2.html index 3aba5c2..4ba4485 100644 --- a/docs/chapter2.html +++ b/docs/chapter2.html @@ -76,6 +76,8 @@ + + + + @@ -244,35 +248,40 @@

3 

3.1 factor variables

-
#| setup: true
-#| echo: false
-#| exercise:
-#|   - ex_1
-#|   - ex_2
-#|   - ex_3
-#|   - ex_4
-#|   - ex_5
-#|   - ex_6
-#|   - ex_7
-#|   - ex_8
-library(ggplot2)
-library(dplyr)
-pets <- read.csv("https://raw.githubusercontent.com/laderast/RBootcamp/master/data/pets.csv")
-pets$ageCategory <- factor(pets$ageCategory, ordered = TRUE)
-pets$animal <- factor(pets$animal)
-pets$name <- factor(pets$name)
-pets$shotsCurrent <- factor(pets$shotsCurrent)
+ + + + + + + +
-
#| edit: false
-#| echo: false
-library(ggplot2)
-library(dplyr)
-pets <- read.csv("https://raw.githubusercontent.com/laderast/RBootcamp/master/data/pets.csv")
-pets$ageCategory <- factor(pets$ageCategory, ordered = TRUE)
-pets$animal <- factor(pets$animal)
-pets$name <- factor(pets$name)
-pets$shotsCurrent <- factor(pets$shotsCurrent)
+
+
+ +
+ +

Factors are how R represents categorical data.

There are two kinds of factors:

@@ -283,8 +292,14 @@

We’ll manipulate our barplots and add more information using factors.

Here’s the dataset we’ll use to investigate how to work with factors in ggplot2.

-
#| edit: false
-pets
+
+
+ +
+ +

3.1.1 Exercise

@@ -292,11 +307,17 @@

#| exercise: ex_1 -##use glimpse here -glimpse(----) -
-

Solution.

+
+
+
+ +
+ +
+
+
@@ -309,8 +330,8 @@

-
##use glimpse here
-glimpse(pets)
+
##use glimpse here
+glimpse(pets)

@@ -328,16 +349,16 @@

3.2.1 Exercise

-
#| exercise: ex_2
-##Show a barplot and count by name and fill by animal
-##theme() allows us to angle the text labels so that we can read them
-ggplot(pets, aes(x= -----)) + geom_bar() + 
-    ##We make the x axis text angled 
-    ##for better legibility
-    theme(axis.text.x = element_text(angle=45))
-
-
-

Solution.

+
+
+ +
+ +
+
+
@@ -350,12 +371,12 @@

-
##show a barplot and count by name and fill by animal
-##theme() allows us to angle the text labels so that we can read them
-ggplot(pets, aes(x=name)) + geom_bar() + 
-    ##we make the x axis text angled 
-    ##for better legibility
-    theme(axis.text.x = element_text(angle=45))
+
##show a barplot and count by name and fill by animal
+##theme() allows us to angle the text labels so that we can read them
+ggplot(pets, aes(x=name)) + geom_bar() + 
+    ##we make the x axis text angled 
+    ##for better legibility
+    theme(axis.text.x = element_text(angle=45))

@@ -368,12 +389,17 @@

Map shotsCurrent to the fill aesthetic.

3.3.1 Exercise

-
#| exercise: ex_3
-#map the right variable in pets to fill
-ggplot(pets, aes(x=animal, fill= ----)) + 
-  geom_bar()
-
-

Solution.

+
+
+
+ +
+ +
+
+
@@ -386,9 +412,9 @@

-
#map the right variable in pets to fill
-ggplot(pets, aes(x=animal, fill=shotsCurrent)) + 
-  geom_bar()
+
#map the right variable in pets to fill
+ggplot(pets, aes(x=animal, fill=shotsCurrent)) + 
+  geom_bar()

@@ -398,27 +424,27 @@

3.4 Quick Quiz

What does mapping color to "black" in geom_bar() do? For example:

-
ggplot(pets, aes(x=animal, fill=shotsCurrent)) + 
-  geom_bar(color="black")
+
ggplot(pets, aes(x=animal, fill=shotsCurrent)) + 
+  geom_bar(color="black")

If you’re unsure, compare the graph above to the previous graph.

-
- - -
- + +
- + + +
+
-
+
-
@@ -431,11 +457,17 @@

Change the position argument in geom_bar() to "fill". What percent of dogs did not receive shots?

3.5.1 Exercise

-
#| exercise: ex_4
-ggplot(pets, aes(x=animal,fill=shotsCurrent)) + 
-  geom_bar(position= ----, color="black")
-
-

Solution.

+
+
+
+ +
+ +
+
+
@@ -448,8 +480,8 @@

-
ggplot(pets, aes(x=animal,fill=shotsCurrent)) + 
-  geom_bar(position= "fill", color="black")
+
ggplot(pets, aes(x=animal,fill=shotsCurrent)) + 
+  geom_bar(position= "fill", color="black")

@@ -462,11 +494,17 @@

3.6.1 Exercise

Change the position argument in geom_bar() to "dodge".

-
#| exercise: ex_5
-ggplot(pets, aes(x=animal,fill=shotsCurrent)) + 
-      geom_bar(position= -----, color="black")
-
-

Solution.

+
+
+
+ +
+ +
+
+
@@ -479,8 +517,8 @@

-
ggplot(pets, aes(x=animal,fill=shotsCurrent)) + 
-      geom_bar(position= "dodge", color="black")
+
ggplot(pets, aes(x=animal,fill=shotsCurrent)) + 
+      geom_bar(position= "dodge", color="black")

@@ -491,25 +529,30 @@

3.7 Faceting a graph

Say you have another factor variable and you want to stratify the plots based on that. You can do that by supplying the name of that variable as a facet. Here, we facet our barplot by shotsCurrent.

-
#| edit: false
-ggplot(data=pets, mapping=aes(x=name)) + geom_bar() + 
-  ##have to specify facets using "~" notation
-  facet_wrap(facets=~shotsCurrent) + 
-  ##we make the x axis x angled for better legibility
-  theme(axis.text.x = element_text(angle=45))
+
+
+ +
+ +

You might notice that there are blank spots for the categories in each facet. We can remove these in each facet by using scale="free_x" argument in facet_wrap().

3.7.1 Exercise

Add free_x to the scale argument. How many animals named “Morris” did not receive shots?

-
#| exercise: ex_6
-ggplot(pets, aes(x=name)) + geom_bar() + 
-  facet_wrap(facets=~shotsCurrent, scale= ----) +
-  theme(axis.text.x = element_text(angle=45))
+
+
+ +
+
-
-

Solution.

+
+
@@ -522,9 +565,9 @@

-
ggplot(pets, aes(x=name)) + geom_bar() + 
-  facet_wrap(facets=~shotsCurrent, scale= "free_x") +
-  theme(axis.text.x = element_text(angle=45))
+
ggplot(pets, aes(x=name)) + geom_bar() + 
+  facet_wrap(facets=~shotsCurrent, scale= "free_x") +
+  theme(axis.text.x = element_text(angle=45))

@@ -535,25 +578,29 @@

3.8 Super Quick Review

Faceting a graph allows us to:

-
#| edit: false
-ggplot(pets, aes(x=name)) + geom_bar() + 
-  facet_wrap(facets=~shotsCurrent, scale="free_x") +
-  theme(axis.text.x = element_text(angle=45))
+
+
+ +
+ +
-
- + - -
+
-
@@ -566,14 +613,16 @@

Is the proportion of animals receiving shots the same across each age category?

Think about what to map to x, and what to map to fill, and what position argument you need for geom_bar(). Finally, think about how to facet the variable.

-
#| exercise: ex_7
-ggplot(pets, aes(x=ageCategory, fill= -----)) + 
-    #what argument goes here?
-    geom_bar(position = ---) +
-    facet_wrap(facets = ----, scale = ----)
-
-
-

Solution.

+
+
+ +
+ +
+
+
@@ -586,10 +635,10 @@

-
ggplot(pets, aes(x=ageCategory, fill=animal)) + 
-  #what argument goes here?
-  geom_bar(position = "fill") +
-  facet_wrap(facets=~shotsCurrent, scale = "free_x")
+
ggplot(pets, aes(x=ageCategory, fill=animal)) + 
+  #what argument goes here?
+  geom_bar(position = "fill") +
+  facet_wrap(facets=~shotsCurrent, scale = "free_x")

@@ -602,22 +651,22 @@

-
- + +
- +
- +
-
+
-
@@ -627,10 +676,15 @@

3.11 Exercise: Try out geom_boxplot() yourself

Plot a boxplot of weight conditioned on animal. Is there a difference in weight between animal types?

Think about what variables map to what aesthetics.

-
#| exercise: ex_8
-ggplot(pets, aes(x= -----, y= -----)) + geom_boxplot()
-
-

Solution.

+
+
+ +
+ +
+
@@ -643,7 +697,7 @@

-
ggplot(pets, aes(x= animal, y= weight)) + geom_boxplot()
+
ggplot(pets, aes(x= animal, y= weight)) + geom_boxplot()

@@ -657,10 +711,15 @@

3.12.1 Exercise

-
#| exercise: ex_8
-ggplot(pets, aes(x= -----, y= -----)) + geom_boxplot()
-
-

Solution.

+
+
+ +
+ +
+
@@ -673,7 +732,7 @@

-
ggplot(pets, aes(x= ageCategory, y= weight)) + geom_boxplot()
+
ggplot(pets, aes(x= ageCategory, y= weight)) + geom_boxplot()

@@ -699,9 +758,37 @@

+ + +
+
+ +
+
+ +
+
+

+ + + + + + @@ -243,27 +247,40 @@

4  -
#| setup: true
-#| echo: false
-#| exercise:
-#|   - ex_1
-#|   - ex_2
-#|   - ex_3
-#|   - ex_4
-#|   - ex_5
-#|   - ex_6
-#|   - ex_7
-#|   - ex_8
-library(dplyr)
-library(fivethirtyeight)
-data(biopics)
+ + + + + + + +

-
#| edit: false
-#| echo: false
-library(dplyr)
-library(fivethirtyeight)
-data(biopics)
+
+
+ +
+ +

We’ve been looking at datasets that fit the ggplot2 paradigm nicely; however, most data we encounter is really messy (missing values), or is a completely different format.

In this chapter, we’ll look at one of the most powerful tools in the tidyverse: dplyr, which lets you manipulate data frames.

@@ -280,12 +297,25 @@

4  dplyr cheat sheet

Also, remember: if you need to know the variables in a data.frame called biopics you can always use

-
#| edit: false
-colnames(biopics)
+
+
+ +
+ +

If you want more information on a function such as mutate(), you can always ask for help:

-
?mutate
+
+
+ +
+ +

Move on to the next exercise!

@@ -293,9 +323,9 @@

sumOfTwoNumbers <- 1 + 2

+
sumOfTwoNumbers <- 1 + 2

Once we have something assigned to a variable, we can use it in other expressions:

-
sumOfThreeNumbers <- sumOfTwoNumbers + 3
+
sumOfThreeNumbers <- sumOfTwoNumbers + 3

This is the bare basics of assignment. We’ll use it in the next exercises to evaluate the output of our dplyr cleaning.

4.1.1 Exercise

@@ -305,16 +335,16 @@

-
#| exercise: ex_1
-##assign newValue
-newValue <- ______
-## use newValue to calculate multValue
-multValue <- _______ * 5
-##show multValue
-multValue
-

-
-

Solution.

+
+
+ +
+ +
+
+
@@ -327,12 +357,12 @@

-
##assign newValue
-newValue <- 10
-## use newValue to calculate multValue
-multValue <- newValue * 5
-##show multValue
-multValue
+
##assign newValue
+newValue <- 10
+## use newValue to calculate multValue
+multValue <- newValue * 5
+##show multValue
+multValue

@@ -350,14 +380,16 @@

Use the levels() function to count the categories.
-
#| exercise: ex_2
-##run summary() here on biopics
-summary(-----)
-##show length of country categories here
-length(levels(biopics$------))
-
-
-

Solution.

+
+
+ +
+ +
+
+
@@ -370,10 +402,10 @@

-
##run summary here
-summary(biopics)
-##show length of country categories here
-length(levels(biopics$country))
+
##run summary here
+summary(biopics)
+##show length of country categories here
+length(levels(biopics$country))

@@ -385,12 +417,14 @@

filter() is a very useful dplyr command. It allows you to subset a data.frame based on variable criteria.

For example, if we wanted to subset biopics to those movies that were made in the UK we’d use the following statement:

-
#| edit: false
-#| echo: true
-#subset the data using filter
-biopicsUK <- filter(biopics, country=="UK")
-#confirm that we have subsetted correctly
-biopicsUK
+
+
+ +
+ +

Three things to note here:

    @@ -405,15 +439,16 @@

    Show how many rows are left using nrow(crimeMovies).

-
#| exercise: ex_3
-#add your filter statement here
-crimeMovies <- filter(------)
+
+
-#show number of crime movies -nrow(------)
-
-

Solution.

+ +
+

+
@@ -426,10 +461,10 @@

-
#add your filter statement here
-crimeMovies <- filter(biopics, type_of_subject == "Criminal")
-#show number of crime movies
-nrow(crimeMovies)
+
#add your filter statement here
+crimeMovies <- filter(biopics, type_of_subject == "Criminal")
+#show number of crime movies
+nrow(crimeMovies)

@@ -440,9 +475,14 @@

4.4 Comparison operators and chaining comparisons

Let’s look at the following filter() statement:

-
#| edit: false
-filter(biopics, year_release > 1980 & 
-    type_of_subject == "Criminal")
+
+
+ +
+ +

Three things to note:

    @@ -457,18 +497,16 @@

    Show how many rows are left from your filter() statement.

-
#| exercise: ex_4
-#add your comparison to the end of this filter statement
-crimeFilms <- filter(biopics, year_release > 1980 & 
-                     type_of_subject == "Criminal" &
-                     ------ == ------
-                     )
-    
-#show number of rows in crimeFilms
-nrow(------)
+
+
+ +
+
-
-

Solution.

+
+
@@ -482,14 +520,14 @@

-
#add your comparison to the end of this filter statement
-crimeFilms <- filter(biopics, year_release > 1980 & 
-                     type_of_subject == "Criminal" & 
-                     person_of_color == FALSE
-                     )
+
+
-#show number of rows in crimeFilms -nrow(crimeFilms)
+
+ +

@@ -501,25 +539,28 @@

4.5 Quick Quiz about Chaining Comparisons

Which statement should be the larger subset? Try them out in the console if you’re not sure.

-
nrow(filter(biopics, year_release > 1980 | type_of_subject == 'Criminal'))
+
+
-nrow(filter(biopics, year_release > 1980 & type_of_subject == 'Criminal'))
+
+ +
-
checkdown::check_question("filter(biopics, year_release > 1980 | type_of_subject == 'Criminal')", 
-                          options=c("filter(biopics, year_release > 1980 & type_of_subject == 'Criminal')", "filter(biopics, year_release > 1980 | type_of_subject == 'Criminal')"))
-
- -
+
-
@@ -529,8 +570,14 @@

4.6 The %in% operator

What if you wanted to select for multiple values? You can use the %in% operator. Here we put the values into a vector with the c() function, which concatentates the values together into a form that R can manipulate. Note that these values have to be exact and the case has to be the same (that is, “UK”, not “Uk” or “uk”) for the matching to work.

-
#| edit: false
-biopicsUSUK <- biopics %>% filter(country %in% c("US", "UK"))
+
+
+ +
+ +

4.6.1 Exercise

@@ -539,14 +586,16 @@

Assign the output to biopicsArt.
-
#| exercise: ex_5
-biopicsArt <- biopics %>%
-  filter(---- %in% ------)
+
+
-head(biopicsArt)
-
-

Solution.

+ +
+

+
@@ -559,10 +608,10 @@

-
biopicsArt <- biopics %>% 
-  filter(type_of_subject %in% c("Musician", "Artist", "Singer"))
-
-head(biopicsArt)
+
biopicsArt <- biopics %>% 
+  filter(type_of_subject %in% c("Musician", "Artist", "Singer"))
+
+head(biopicsArt)

@@ -573,9 +622,14 @@

4.7 Removing Missing Values

One trick you can use filter() for is to remove missing values. Usually missing values are coded as NA in data. You can remove rows that contain NAs by using is.na(). For example:

-
#| edit: false
-#| echo: true
-filter(biopics, !is.na(box_office))
+
+
+ +
+ +

Note the ! in front of is.na(box_office). This ! is known as the NOT operator. Basically, it switches the values in our is.na statement, making everything that was TRUE into FALSE, and everything FALSE into TRUE. We want to keep everything that is not NA, so that’s why we use the !.

@@ -586,15 +640,16 @@

How many missing values did we remove?
-
#| exercise: ex_6
-filteredBiopics <- filter(--------, -------)
-#show number of rows in biopics
-nrow(biopics)
-#show number of rows in filteredBiopics
-nrow(filteredBiopics)
-
-
-

Solution.

+
+
+ +
+ +
+
+
@@ -608,11 +663,14 @@

-
filteredBiopics <- filter(biopics, !is.na(box_office))
-#show number of rows in biopics
-nrow(biopics)
-#show number of rows in filteredBiopics
-nrow(filteredBiopics)
+
+
+ +
+ +

@@ -624,10 +682,14 @@

4.8 dplyr::mutate()

mutate() is one of the most useful dplyr commands. You can use it to transform data (variables in your data.frame) and add it as a new variable into the data.frame. For example, let’s calculate the total box_office divided by the number_of_subjects to normalize our comparison as normalized_box_office:

-
#| edit: false
-#| echo: true
-biopics2 <- mutate(biopics, normalized_box_office = box_office/number_of_subjects)
-biopics2
+
+
+ +
+ +

What did we do here? First, we used the mutate() function to add a new column into our data.frame called normalized_box_office. This new variable is calculated per row by dividing box_office by number_of_subjects.

@@ -638,15 +700,16 @@

Remember, you can use the paste() function to paste two strings together.

-
#| exercise: ex_7
-#assign new variable race_and_gender here using mutate()
-biopics2 <- mutate()
+
+
-#show first rows of biopics2 using head() -head(biopics2)
-
-

Solution.

+ +
+

+
@@ -659,10 +722,10 @@

-
#assign new variable race_and_gender here using mutate()
-biopics2 <- mutate(biopics, race_and_gender = paste(subject_race, subject_sex))
-#show first rows of biopics2 using head()
-head(biopics2)
+
#assign new variable race_and_gender here using mutate()
+biopics2 <- mutate(biopics, race_and_gender = paste(subject_race, subject_sex))
+#show first rows of biopics2 using head()
+head(biopics2)

@@ -673,10 +736,14 @@

4.9 You can use mutated variables right away!

The nifty thing about mutate() is that once you define the variables in the statement, you can use them right away, in the same mutate statement. For example, look at this code:

-
#| edit: false
-mutate(biopics, 
-    box_office_year = year_release * box_office, 
-    box_office_subject = paste0(box_office_year, subject))
+
+
+ +
+ +

Notice that we first defined box_office_year in the first part of the mutate() statement, and then used it right away to define a new variable, box_office_subject.

@@ -687,16 +754,16 @@

Hint: Add box_office_y_s_num=box_office_year/number_of_subjects to the statement below.
-
#| exercise: ex_8
-mutatedBiopics <- mutate(biopics, 
-                         box_office_year = year_release * box_office, 
-                         box_office_subject = paste0(box_office_year, subject),
-                         box_office_y_s_num= ------)
+
+
-mutatedBiopics
-
-

Solution.

+ +
+

+
@@ -709,12 +776,12 @@

-
mutatedBiopics <- mutate(biopics, 
-                         box_office_year = year_release * box_office, 
-                         box_office_subject = paste0(box_office_year, subject), 
-                         box_office_y_s_num = box_office_year/number_of_subjects)
-
-mutatedBiopics
+
mutatedBiopics <- mutate(biopics, 
+                         box_office_year = year_release * box_office, 
+                         box_office_subject = paste0(box_office_year, subject), 
+                         box_office_y_s_num = box_office_year/number_of_subjects)
+
+mutatedBiopics

@@ -725,24 +792,28 @@

4.10 Another Use for mutate()

What is this statement doing? Try it out in the console if you’re not sure.

-
mutate(biopics, subject= paste(subject, year_release))
+
+
+ +
+ +
-
checkdown::check_question("We are defining a brand-new variable with the same name in our dataset and keeping the old variable as well", options = c(
-  "We are defining a brand-new variable with the same name in our dataset and keeping the old variable as well", "We are processing the variable `subject` and saving it in place"
-))
-
- -
+
-
@@ -752,28 +823,38 @@

4.11 The difference between filter() and mutate()

What is the difference between these two statements? Try them out in the console if you’re not sure.

-
biopics %>% 
-    filter(year_release > 1998) %>% 
-    head()
+
+
+ +
+ +
-
biopics %>% 
-    mutate(isNewer = year_release > 1998) %>% 
-    head()
+
+
+ +
+ +
-
- -
+
-
@@ -787,9 +868,37 @@

+eyJyZW5kZXJfZGYiOiJkZWZhdWx0Iiwib3B0aW9ucyI6eyJiYXNlVXJsIjoiaHR0cHM6Ly93ZWJyLnItd2FzbS5vcmcvdjAuNC4xLyJ9LCJwYWNrYWdlcyI6eyJyZXBvcyI6W10sInBrZ3MiOlsiZXZhbHVhdGUiLCJrbml0ciIsImh0bWx0b29scyIsImdncGxvdDIiLCJjaGVja2Rvd24iLCJkcGx5ciIsImZpdmV0aGlydHllaWdodCJdfX0= + + +
+
+ +
+
+ +
+
+

+ + + + + + + @@ -182,9 +186,34 @@

1 Welcome to the

No browser needed!

+ +
+
+ +
+
+ +
+
+

+ +