diff --git a/docs/404.html b/docs/404.html index edcc01b..ff4f6eb 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@ tidycensus - 1.5 + 1.6.1 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index bf1afcc..af3a11b 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1 diff --git a/docs/articles/basic-usage.html b/docs/articles/basic-usage.html index 86c16a2..317d494 100644 --- a/docs/articles/basic-usage.html +++ b/docs/articles/basic-usage.html @@ -40,7 +40,7 @@ tidycensus - 1.5 + 1.6.1 @@ -117,10 +117,10 @@

Basic usage of tidycensus

load the package along with the tidyverse package, and set their Census API key. A key can be obtained from http://api.census.gov/data/key_signup.html.

-library(tidycensus)
-library(tidyverse)
-
-census_api_key("YOUR API KEY GOES HERE")
+library(tidycensus) +library(tidyverse) + +census_api_key("YOUR API KEY GOES HERE")

There are two major functions implemented in tidycensus: get_decennial(), which grants access to the 2000, 2010, and 2020 decennial US Census APIs, and @@ -130,21 +130,21 @@

Basic usage of tidycensus

with data drawn from the Demographic and Housing Characteristics summary file:

-age20 <- get_decennial(geography = "state", 
-                       variables = "P13_001N", 
-                       year = 2020,
-                       sumfile = "dhc")
-
-head(age20)
-
## # A tibble: 6 × 4
-##   GEOID NAME                 variable value
-##   <chr> <chr>                <chr>    <dbl>
-## 1 09    Connecticut          P13_001N  41.1
-## 2 10    Delaware             P13_001N  41.1
-## 3 11    District of Columbia P13_001N  33.9
-## 4 12    Florida              P13_001N  43  
-## 5 13    Georgia              P13_001N  37.5
-## 6 15    Hawaii               P13_001N  40.8
+age20 <- get_decennial(geography = "state", + variables = "P13_001N", + year = 2020, + sumfile = "dhc") + +head(age20) +
## # A tibble: 6 × 4
+##   GEOID NAME                 variable value
+##   <chr> <chr>                <chr>    <dbl>
+## 1 09    Connecticut          P13_001N  41.1
+## 2 10    Delaware             P13_001N  41.1
+## 3 11    District of Columbia P13_001N  33.9
+## 4 12    Florida              P13_001N  43  
+## 5 13    Georgia              P13_001N  37.5
+## 6 15    Hawaii               P13_001N  40.8

The function returns a tibble with four columns by default: GEOID, which is an identifier for the geographical unit associated with the row; NAME, which is a descriptive name @@ -158,9 +158,9 @@

Basic usage of tidycensus

As the function has returned a tidy object, we can visualize it quickly with ggplot2:

-age20 %>%
-  ggplot(aes(x = value, y = reorder(NAME, value))) + 
-  geom_point()
+age20 %>% + ggplot(aes(x = value, y = reorder(NAME, value))) + + geom_point()

Geography in tidycensus @@ -449,9 +449,9 @@

Searching for variablescache = TRUE will cache the dataset on your computer for future use.

-v17 <- load_variables(2017, "acs5", cache = TRUE)
-
-View(v17)
+v17 <- load_variables(2017, "acs5", cache = TRUE) + +View(v17)

By filtering for “median age” variable IDs corresponding to that query can be browsed interactively. For the 5-year ACS detailed tables @@ -480,29 +480,29 @@

Working with ACS data
-vt <- get_acs(geography = "county", 
-              variables = c(medincome = "B19013_001"), 
-              state = "VT", 
-              year = 2021)
-
-vt
-
## # A tibble: 14 × 5
-##    GEOID NAME                       variable  estimate   moe
-##    <chr> <chr>                      <chr>        <dbl> <dbl>
-##  1 50001 Addison County, Vermont    medincome    77978  3393
-##  2 50003 Bennington County, Vermont medincome    63448  3413
-##  3 50005 Caledonia County, Vermont  medincome    55159  3974
-##  4 50007 Chittenden County, Vermont medincome    81957  2521
-##  5 50009 Essex County, Vermont      medincome    48194  3577
-##  6 50011 Franklin County, Vermont   medincome    68476  3297
-##  7 50013 Grand Isle County, Vermont medincome    85154  7894
-##  8 50015 Lamoille County, Vermont   medincome    66016  4777
-##  9 50017 Orange County, Vermont     medincome    67906  2710
-## 10 50019 Orleans County, Vermont    medincome    58037  3153
-## 11 50021 Rutland County, Vermont    medincome    59751  2133
-## 12 50023 Washington County, Vermont medincome    70128  3014
-## 13 50025 Windham County, Vermont    medincome    59195  2060
-## 14 50027 Windsor County, Vermont    medincome    63787  2209
+vt <- get_acs(geography = "county", + variables = c(medincome = "B19013_001"), + state = "VT", + year = 2021) + +vt +
## # A tibble: 14 × 5
+##    GEOID NAME                       variable  estimate   moe
+##    <chr> <chr>                      <chr>        <dbl> <dbl>
+##  1 50001 Addison County, Vermont    medincome    77978  3393
+##  2 50003 Bennington County, Vermont medincome    63448  3413
+##  3 50005 Caledonia County, Vermont  medincome    55159  3974
+##  4 50007 Chittenden County, Vermont medincome    81957  2521
+##  5 50009 Essex County, Vermont      medincome    48194  3577
+##  6 50011 Franklin County, Vermont   medincome    68476  3297
+##  7 50013 Grand Isle County, Vermont medincome    85154  7894
+##  8 50015 Lamoille County, Vermont   medincome    66016  4777
+##  9 50017 Orange County, Vermont     medincome    67906  2710
+## 10 50019 Orleans County, Vermont    medincome    58037  3153
+## 11 50021 Rutland County, Vermont    medincome    59751  2133
+## 12 50023 Washington County, Vermont medincome    70128  3014
+## 13 50025 Windham County, Vermont    medincome    59195  2060
+## 14 50027 Windsor County, Vermont    medincome    63787  2209

The output is similar to a call to get_decennial(), but instead of a value column, get_acs returns estimate and moe columns for the ACS estimate @@ -513,15 +513,15 @@

Working with ACS dataAs we have the margin of error, we can visualize the uncertainty around the estimate:

-vt %>%
-  mutate(NAME = gsub(" County, Vermont", "", NAME)) %>%
-  ggplot(aes(x = estimate, y = reorder(NAME, estimate))) +
-  geom_errorbarh(aes(xmin = estimate - moe, xmax = estimate + moe)) +
-  geom_point(color = "red", size = 3) +
-  labs(title = "Household income by county in Vermont",
-       subtitle = "2017-2021 American Community Survey",
-       y = "",
-       x = "ACS estimate (bars represent margin of error)")
+vt %>% + mutate(NAME = gsub(" County, Vermont", "", NAME)) %>% + ggplot(aes(x = estimate, y = reorder(NAME, estimate))) + + geom_errorbarh(aes(xmin = estimate - moe, xmax = estimate + moe)) + + geom_point(color = "red", size = 3) + + labs(title = "Household income by county in Vermont", + subtitle = "2017-2021 American Community Survey", + y = "", + x = "ACS estimate (bars represent margin of error)")

diff --git a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png index 1e2b0e8..3af9d96 100644 Binary files a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png index 70d9334..6b01d21 100644 Binary files a/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/basic-usage_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index d05ecbb..e9ae161 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1 diff --git a/docs/articles/margins-of-error.html b/docs/articles/margins-of-error.html index a191a42..378a005 100644 --- a/docs/articles/margins-of-error.html +++ b/docs/articles/margins-of-error.html @@ -40,7 +40,7 @@ tidycensus - 1.5 + 1.6.1 @@ -134,27 +134,27 @@

Margins of error in the ACS

from the 2012-2016 ACS by Census tracts. We can pull up a dataset with get_acs:

-library(tidycensus)
-library(tidyverse)
-
-vars <- paste0("B01001_0", c(20:25, 44:49))
-
-ramsey <- get_acs(geography = "tract", 
-                  variables = vars, 
-                  state = "MN", 
-                  county = "Ramsey", 
-                  year = 2016)
-
-head(ramsey %>% select(-NAME))
-
## # A tibble: 6 × 4
-##   GEOID       variable   estimate   moe
-##   <chr>       <chr>         <dbl> <dbl>
-## 1 27123030100 B01001_020       51    27
-## 2 27123030100 B01001_021       92    40
-## 3 27123030100 B01001_022       48    28
-## 4 27123030100 B01001_023        8    13
-## 5 27123030100 B01001_024       51    52
-## 6 27123030100 B01001_025       23    18
+library(tidycensus) +library(tidyverse) + +vars <- paste0("B01001_0", c(20:25, 44:49)) + +ramsey <- get_acs(geography = "tract", + variables = vars, + state = "MN", + county = "Ramsey", + year = 2016) + +head(ramsey %>% select(-NAME)) +
## # A tibble: 6 × 4
+##   GEOID       variable   estimate   moe
+##   <chr>       <chr>         <dbl> <dbl>
+## 1 27123030100 B01001_020       51    27
+## 2 27123030100 B01001_021       92    40
+## 3 27123030100 B01001_022       48    28
+## 4 27123030100 B01001_023        8    13
+## 5 27123030100 B01001_024       51    52
+## 6 27123030100 B01001_025       23    18

We can see that in two instances the margin of error exceeds the estimate. One way to address this is through data aggregation. While the specific group estimates in this Census tract may be unreliable, the @@ -169,21 +169,21 @@

Margins of error in the ACS

use of moe_sum to calculate the margin of error around a derived estimate for Census tract population over age 65.

-ramsey65 <- ramsey %>%
-  group_by(GEOID) %>%
-  summarize(sumest = sum(estimate), 
-            summoe = moe_sum(moe, estimate))
-
-head(ramsey65)
-
## # A tibble: 6 × 3
-##   GEOID       sumest summoe
-##   <chr>        <dbl>  <dbl>
-## 1 27123030100    677  124. 
-## 2 27123030201    899  201. 
-## 3 27123030202    149   53.8
-## 4 27123030300    783  154. 
-## 5 27123030400    423  131. 
-## 6 27123030500    396  111.
+ramsey65 <- ramsey %>% + group_by(GEOID) %>% + summarize(sumest = sum(estimate), + summoe = moe_sum(moe, estimate)) + +head(ramsey65) +
## # A tibble: 6 × 3
+##   GEOID       sumest summoe
+##   <chr>        <dbl>  <dbl>
+## 1 27123030100    677  124. 
+## 2 27123030201    899  201. 
+## 3 27123030202    149   53.8
+## 4 27123030300    783  154. 
+## 5 27123030400    423  131. 
+## 6 27123030500    396  111.

The margins of error for this aggregate population are more reasonable. However, the Census Bureau does issue this warning:

diff --git a/docs/articles/other-datasets.html b/docs/articles/other-datasets.html index cbe6c40..6493e7e 100644 --- a/docs/articles/other-datasets.html +++ b/docs/articles/other-datasets.html @@ -40,7 +40,7 @@ tidycensus - 1.5 + 1.6.1 @@ -155,39 +155,39 @@

Components of change populati component. For example, we can request all components of change variables for US states in 2022:

-library(tidycensus)
-library(tidyverse)
-library(tigris)
-options(tigris_use_cache = TRUE)
-
-us_components <- get_estimates(geography = "state", product = "components", year = 2022)
-
-us_components
-
## # A tibble: 676 × 5
-##    GEOID NAME    variable          year    value
-##    <chr> <chr>   <chr>            <int>    <dbl>
-##  1 01    Alabama BIRTHS            2022 58280   
-##  2 01    Alabama DEATHS            2022 66870   
-##  3 01    Alabama NATURALCHG        2022 -8590   
-##  4 01    Alabama INTERNATIONALMIG  2022  4597   
-##  5 01    Alabama DOMESTICMIG       2022 28609   
-##  6 01    Alabama NETMIG            2022 33206   
-##  7 01    Alabama RESIDUAL          2022  -166   
-##  8 01    Alabama RBIRTH            2022    11.5 
-##  9 01    Alabama RDEATH            2022    13.2 
-## 10 01    Alabama RNATURALCHG       2022    -1.70
-## # ℹ 666 more rows
+library(tidycensus) +library(tidyverse) +library(tigris) +options(tigris_use_cache = TRUE) + +us_components <- get_estimates(geography = "state", product = "components", year = 2022) + +us_components +
## # A tibble: 676 × 5
+##    GEOID NAME    variable          year    value
+##    <chr> <chr>   <chr>            <int>    <dbl>
+##  1 01    Alabama BIRTHS            2022 58280   
+##  2 01    Alabama DEATHS            2022 66870   
+##  3 01    Alabama NATURALCHG        2022 -8590   
+##  4 01    Alabama INTERNATIONALMIG  2022  4597   
+##  5 01    Alabama DOMESTICMIG       2022 28609   
+##  6 01    Alabama NETMIG            2022 33206   
+##  7 01    Alabama RESIDUAL          2022  -166   
+##  8 01    Alabama RBIRTH            2022    11.5 
+##  9 01    Alabama RDEATH            2022    13.2 
+## 10 01    Alabama RNATURALCHG       2022    -1.70
+## # ℹ 666 more rows

The variables included in the components of change product consist of both estimates of counts and rates. Rates are preceded by an R in the variable name and are calculated per 1000 residents.

-unique(us_components$variable)
-
##  [1] "BIRTHS"            "DEATHS"            "NATURALCHG"       
-##  [4] "INTERNATIONALMIG"  "DOMESTICMIG"       "NETMIG"           
-##  [7] "RESIDUAL"          "RBIRTH"            "RDEATH"           
-## [10] "RNATURALCHG"       "RINTERNATIONALMIG" "RDOMESTICMIG"     
-## [13] "RNETMIG"
+unique(us_components$variable) +
##  [1] "BIRTHS"            "DEATHS"            "NATURALCHG"       
+##  [4] "INTERNATIONALMIG"  "DOMESTICMIG"       "NETMIG"           
+##  [7] "RESIDUAL"          "RBIRTH"            "RDEATH"           
+## [10] "RNATURALCHG"       "RINTERNATIONALMIG" "RDOMESTICMIG"     
+## [13] "RNETMIG"

Available geographies include "us", "state", "county", "metropolitan statistical area/micropolitan statistical area" @@ -203,67 +203,67 @@

Components of change populati package to shift and rescale counties outside the continental US for national mapping.

-net_migration <- get_estimates(geography = "county",
-                               variables = "RNETMIG",
-                               year = 2022,
-                               geometry = TRUE,
-                               resolution = "20m") %>%
-  shift_geometry()
-
-net_migration
-
## Simple feature collection with 3144 features and 5 fields
-## Geometry type: GEOMETRY
-## Dimension:     XY
-## Bounding box:  xmin: -3112200 ymin: -1697728 xmax: 2258154 ymax: 1558935
-## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
-## # A tibble: 3,144 × 6
-##    GEOID NAME                    variable  year  value                  geometry
-##    <chr> <chr>                   <chr>    <int>  <dbl>        <MULTIPOLYGON [m]>
-##  1 17127 Massac County, Illinois RNETMIG   2022   2.01 (((620306.9 994.4699, 62…
-##  2 27017 Carlton County, Minnes… RNETMIG   2022   9.57 (((225299.4 1038545, 283…
-##  3 37181 Vance County, North Ca… RNETMIG   2022   4.13 (((1544259 32180.06, 154…
-##  4 47079 Henry County, Tennessee RNETMIG   2022  12.6  (((663474.2 -85746.62, 6…
-##  5 06021 Glenn County, Californ… RNETMIG   2022 -14.1  (((-2253309 578922.1, -2…
-##  6 17093 Kendall County, Illino… RNETMIG   2022  12.5  (((610436.7 496523.1, 63…
-##  7 19095 Iowa County, Iowa       RNETMIG   2022  -2.30 (((305057 494717.2, 3435…
-##  8 22003 Allen Parish, Louisiana RNETMIG   2022  -9.26 (((274501 -766852.4, 289…
-##  9 18055 Greene County, Indiana  RNETMIG   2022  10.8  (((748725.5 221905.9, 76…
-## 10 33001 Belknap County, New Ha… RNETMIG   2022   8.18 (((1931026 926777.5, 193…
-## # ℹ 3,134 more rows
+net_migration <- get_estimates(geography = "county", + variables = "RNETMIG", + year = 2022, + geometry = TRUE, + resolution = "20m") %>% + shift_geometry() + +net_migration +
## Simple feature collection with 3144 features and 5 fields
+## Geometry type: GEOMETRY
+## Dimension:     XY
+## Bounding box:  xmin: -3112200 ymin: -1697728 xmax: 2258154 ymax: 1558935
+## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
+## # A tibble: 3,144 × 6
+##    GEOID NAME                    variable  year  value                  geometry
+##    <chr> <chr>                   <chr>    <int>  <dbl>        <MULTIPOLYGON [m]>
+##  1 17127 Massac County, Illinois RNETMIG   2022   2.01 (((620306.9 994.4699, 62…
+##  2 27017 Carlton County, Minnes… RNETMIG   2022   9.57 (((225299.4 1038545, 283…
+##  3 37181 Vance County, North Ca… RNETMIG   2022   4.13 (((1544259 32180.06, 154…
+##  4 47079 Henry County, Tennessee RNETMIG   2022  12.6  (((663474.2 -85746.62, 6…
+##  5 06021 Glenn County, Californ… RNETMIG   2022 -14.1  (((-2253309 578922.1, -2…
+##  6 17093 Kendall County, Illino… RNETMIG   2022  12.5  (((610436.7 496523.1, 63…
+##  7 19095 Iowa County, Iowa       RNETMIG   2022  -2.30 (((305057 494717.2, 3435…
+##  8 22003 Allen Parish, Louisiana RNETMIG   2022  -9.26 (((274501 -766852.4, 289…
+##  9 18055 Greene County, Indiana  RNETMIG   2022  10.8  (((748725.5 221905.9, 76…
+## 10 33001 Belknap County, New Ha… RNETMIG   2022   8.18 (((1931026 926777.5, 193…
+## # ℹ 3,134 more rows

We’ll next use tidyverse tools to generate a groups column that bins the net migration rates into comprehensible categories, and plot the result using geom_sf() and ggplot2.

-order = c("-15 and below", "-15 to -5", "-5 to +5", "+5 to +15", "+15 and up")
-
-net_migration <- net_migration %>%
-  mutate(groups = case_when(
-    value > 15 ~ "+15 and up",
-    value > 5 ~ "+5 to +15",
-    value > -5 ~ "-5 to +5",
-    value > -15 ~ "-15 to -5",
-    TRUE ~ "-15 and below"
-  )) %>%
-  mutate(groups = factor(groups, levels = order))
-
-state_overlay <- states(
-  cb = TRUE,
-  resolution = "20m"
-) %>%
-  filter(GEOID != "72") %>%
-  shift_geometry()
-
-ggplot() +
-  geom_sf(data = net_migration, aes(fill = groups, color = groups), size = 0.1) +
-  geom_sf(data = state_overlay, fill = NA, color = "black", size = 0.1) +
-  scale_fill_brewer(palette = "PuOr", direction = -1) +
-  scale_color_brewer(palette = "PuOr", direction = -1, guide = FALSE) +
-  coord_sf(datum = NA) +
-  theme_minimal(base_family = "Roboto") +
-  labs(title = "Net migration per 1000 residents by county",
-       subtitle = "US Census Bureau 2022 Population Estimates",
-       fill = "Rate",
-       caption = "Data acquired with the R tidycensus package | @kyle_e_walker")
+order = c("-15 and below", "-15 to -5", "-5 to +5", "+5 to +15", "+15 and up") + +net_migration <- net_migration %>% + mutate(groups = case_when( + value > 15 ~ "+15 and up", + value > 5 ~ "+5 to +15", + value > -5 ~ "-5 to +5", + value > -15 ~ "-15 to -5", + TRUE ~ "-15 and below" + )) %>% + mutate(groups = factor(groups, levels = order)) + +state_overlay <- states( + cb = TRUE, + resolution = "20m" +) %>% + filter(GEOID != "72") %>% + shift_geometry() + +ggplot() + + geom_sf(data = net_migration, aes(fill = groups, color = groups), size = 0.1) + + geom_sf(data = state_overlay, fill = NA, color = "black", size = 0.1) + + scale_fill_brewer(palette = "PuOr", direction = -1) + + scale_color_brewer(palette = "PuOr", direction = -1, guide = FALSE) + + coord_sf(datum = NA) + + theme_minimal(base_family = "Roboto") + + labs(title = "Net migration per 1000 residents by county", + subtitle = "US Census Bureau 2022 Population Estimates", + fill = "Rate", + caption = "Data acquired with the R tidycensus package | @kyle_e_walker")

@@ -282,53 +282,53 @@

Estimates of population charact However, by specifying breakdown_labels = TRUE, the function will return the appropriate labels instead. For example:

-la_age_hisp <- get_estimates(geography = "county", 
-                             product = "characteristics", 
-                             breakdown = c("SEX", "AGEGROUP", "HISP"),  
-                             breakdown_labels = TRUE, 
-                             state = "CA", 
-                             county = "Los Angeles",
-                             year = 2022)
-
-la_age_hisp
-
## # A tibble: 114 × 7
-##    GEOID NAME                            year SEX   AGEGROUP        HISP   value
-##    <chr> <chr>                          <int> <chr> <fct>           <chr>  <dbl>
-##  1 06037 Los Angeles County, California  2022 Male  All ages        Both… 4.83e6
-##  2 06037 Los Angeles County, California  2022 Male  All ages        Non-… 2.44e6
-##  3 06037 Los Angeles County, California  2022 Male  All ages        Hisp… 2.39e6
-##  4 06037 Los Angeles County, California  2022 Male  Age 0 to 4 yea… Both… 2.49e5
-##  5 06037 Los Angeles County, California  2022 Male  Age 0 to 4 yea… Non-… 1.06e5
-##  6 06037 Los Angeles County, California  2022 Male  Age 0 to 4 yea… Hisp… 1.43e5
-##  7 06037 Los Angeles County, California  2022 Male  Age 5 to 9 yea… Both… 2.84e5
-##  8 06037 Los Angeles County, California  2022 Male  Age 5 to 9 yea… Non-… 1.20e5
-##  9 06037 Los Angeles County, California  2022 Male  Age 5 to 9 yea… Hisp… 1.63e5
-## 10 06037 Los Angeles County, California  2022 Male  Age 10 to 14 y… Both… 2.98e5
-## # ℹ 104 more rows
+la_age_hisp <- get_estimates(geography = "county", + product = "characteristics", + breakdown = c("SEX", "AGEGROUP", "HISP"), + breakdown_labels = TRUE, + state = "CA", + county = "Los Angeles", + year = 2022) + +la_age_hisp

+
## # A tibble: 114 × 7
+##    GEOID NAME                            year SEX   AGEGROUP        HISP   value
+##    <chr> <chr>                          <int> <chr> <fct>           <chr>  <dbl>
+##  1 06037 Los Angeles County, California  2022 Male  All ages        Both… 4.83e6
+##  2 06037 Los Angeles County, California  2022 Male  All ages        Non-… 2.44e6
+##  3 06037 Los Angeles County, California  2022 Male  All ages        Hisp… 2.39e6
+##  4 06037 Los Angeles County, California  2022 Male  Age 0 to 4 yea… Both… 2.49e5
+##  5 06037 Los Angeles County, California  2022 Male  Age 0 to 4 yea… Non-… 1.06e5
+##  6 06037 Los Angeles County, California  2022 Male  Age 0 to 4 yea… Hisp… 1.43e5
+##  7 06037 Los Angeles County, California  2022 Male  Age 5 to 9 yea… Both… 2.84e5
+##  8 06037 Los Angeles County, California  2022 Male  Age 5 to 9 yea… Non-… 1.20e5
+##  9 06037 Los Angeles County, California  2022 Male  Age 5 to 9 yea… Hisp… 1.63e5
+## 10 06037 Los Angeles County, California  2022 Male  Age 10 to 14 y… Both… 2.98e5
+## # ℹ 104 more rows

With some additional data wrangling, the returned format facilitates analysis and visualization. For example, we can compare population pyramids for Hispanic and non-Hispanic populations in Los Angeles County:

-compare <- filter(la_age_hisp, str_detect(AGEGROUP, "^Age"), 
-                  HISP != "Both Hispanic Origins", 
-                  SEX != "Both sexes") %>%
-  mutate(value = ifelse(SEX == "Male", -value, value))
-
-ggplot(compare, aes(x = AGEGROUP, y = value, fill = SEX)) + 
-  geom_bar(stat = "identity", width = 1) + 
-  theme_minimal(base_family = "Roboto") + 
-  scale_y_continuous(labels = function(y) paste0(abs(y / 1000), "k")) + 
-  scale_x_discrete(labels = function(x) gsub("Age | years", "", x)) + 
-  scale_fill_manual(values = c("darkred", "navy")) + 
-  coord_flip() + 
-  facet_wrap(~HISP) + 
-  labs(x = "", 
-       y = "2022 Census Bureau population estimate", 
-       title = "Population structure by Hispanic origin", 
-       subtitle = "Los Angeles County, California", 
-       fill = "", 
-       caption = "Data source: US Census Bureau population estimates & tidycensus R package")
+compare <- filter(la_age_hisp, str_detect(AGEGROUP, "^Age"), + HISP != "Both Hispanic Origins", + SEX != "Both sexes") %>% + mutate(value = ifelse(SEX == "Male", -value, value)) + +ggplot(compare, aes(x = AGEGROUP, y = value, fill = SEX)) + + geom_bar(stat = "identity", width = 1) + + theme_minimal(base_family = "Roboto") + + scale_y_continuous(labels = function(y) paste0(abs(y / 1000), "k")) + + scale_x_discrete(labels = function(x) gsub("Age | years", "", x)) + + scale_fill_manual(values = c("darkred", "navy")) + + coord_flip() + + facet_wrap(~HISP) + + labs(x = "", + y = "2022 Census Bureau population estimate", + title = "Population structure by Hispanic origin", + subtitle = "Los Angeles County, California", + fill = "", + caption = "Data source: US Census Bureau population estimates & tidycensus R package")

@@ -368,25 +368,25 @@

Using get_flows()Here we get county-to-county flow data for Westchester County, NY:

-wch_flows <- get_flows(
-  geography = "county",
-  state = "NY",
-  county = "Westchester",
-  year = 2018
-  )
-
-wch_flows %>% 
-  filter(!is.na(GEOID2)) %>% 
-  head()
-
## # A tibble: 6 × 7
-##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME  variable estimate   moe
-##   <chr>  <chr>  <chr>                        <chr>       <chr>       <dbl> <dbl>
-## 1 36119  01089  Westchester County, New York Madison Co… MOVEDIN         0    28
-## 2 36119  01089  Westchester County, New York Madison Co… MOVEDOUT       26    41
-## 3 36119  01089  Westchester County, New York Madison Co… MOVEDNET      -26    41
-## 4 36119  01095  Westchester County, New York Marshall C… MOVEDIN         0    28
-## 5 36119  01095  Westchester County, New York Marshall C… MOVEDOUT       35    55
-## 6 36119  01095  Westchester County, New York Marshall C… MOVEDNET      -35    55
+wch_flows <- get_flows( + geography = "county", + state = "NY", + county = "Westchester", + year = 2018 + ) + +wch_flows %>% + filter(!is.na(GEOID2)) %>% + head() +
## # A tibble: 6 × 7
+##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME  variable estimate   moe
+##   <chr>  <chr>  <chr>                        <chr>       <chr>       <dbl> <dbl>
+## 1 36119  01089  Westchester County, New York Madison Co… MOVEDIN         0    28
+## 2 36119  01089  Westchester County, New York Madison Co… MOVEDOUT       26    41
+## 3 36119  01089  Westchester County, New York Madison Co… MOVEDNET      -26    41
+## 4 36119  01095  Westchester County, New York Marshall C… MOVEDIN         0    28
+## 5 36119  01095  Westchester County, New York Marshall C… MOVEDOUT       35    55
+## 6 36119  01095  Westchester County, New York Marshall C… MOVEDNET      -35    55

With the default setting of get_flows(), data is returned in a “tidy” or long format. Notice that for each pair of places, there are three rows returned with one row for each variable @@ -400,38 +400,38 @@

Using get_flows()One simple question we can answer with this data is, to which county did the most people move from Westchester?

-wch_flows %>% 
-  filter(variable == "MOVEDOUT") %>% 
-  arrange(desc(estimate)) %>% 
-  head()
-
## # A tibble: 6 × 7
-##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME  variable estimate   moe
-##   <chr>  <chr>  <chr>                        <chr>       <chr>       <dbl> <dbl>
-## 1 36119  09001  Westchester County, New York Fairfield … MOVEDOUT     3916   778
-## 2 36119  36061  Westchester County, New York New York C… MOVEDOUT     3328   596
-## 3 36119  36005  Westchester County, New York Bronx Coun… MOVEDOUT     2063   418
-## 4 36119  36027  Westchester County, New York Dutchess C… MOVEDOUT     1870   454
-## 5 36119  36079  Westchester County, New York Putnam Cou… MOVEDOUT     1318   324
-## 6 36119  36081  Westchester County, New York Queens Cou… MOVEDOUT     1082   240
+wch_flows %>% + filter(variable == "MOVEDOUT") %>% + arrange(desc(estimate)) %>% + head() +
## # A tibble: 6 × 7
+##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME  variable estimate   moe
+##   <chr>  <chr>  <chr>                        <chr>       <chr>       <dbl> <dbl>
+## 1 36119  09001  Westchester County, New York Fairfield … MOVEDOUT     3916   778
+## 2 36119  36061  Westchester County, New York New York C… MOVEDOUT     3328   596
+## 3 36119  36005  Westchester County, New York Bronx Coun… MOVEDOUT     2063   418
+## 4 36119  36027  Westchester County, New York Dutchess C… MOVEDOUT     1870   454
+## 5 36119  36079  Westchester County, New York Putnam Cou… MOVEDOUT     1318   324
+## 6 36119  36081  Westchester County, New York Queens Cou… MOVEDOUT     1082   240

The MOVEDOUT variable only estimates the number of people that moved out of Westchester County and doesn’t account for the number of people that moved in to Westchester from each county. If you are interested in net migration (moved in - moved out), you can use the MOVEDNET variable.

-wch_flows %>% 
-  filter(variable == "MOVEDNET") %>% 
-  arrange(estimate) %>% 
-  head()
-
## # A tibble: 6 × 7
-##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME  variable estimate   moe
-##   <chr>  <chr>  <chr>                        <chr>       <chr>       <dbl> <dbl>
-## 1 36119  09001  Westchester County, New York Fairfield … MOVEDNET    -1768   958
-## 2 36119  36027  Westchester County, New York Dutchess C… MOVEDNET    -1119   497
-## 3 36119  06037  Westchester County, New York Los Angele… MOVEDNET     -486   339
-## 4 36119  12099  Westchester County, New York Palm Beach… MOVEDNET     -450   182
-## 5 36119  25021  Westchester County, New York Norfolk Co… MOVEDNET     -358   351
-## 6 36119  36079  Westchester County, New York Putnam Cou… MOVEDNET     -340   407
+wch_flows %>% + filter(variable == "MOVEDNET") %>% + arrange(estimate) %>% + head() +
## # A tibble: 6 × 7
+##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME  variable estimate   moe
+##   <chr>  <chr>  <chr>                        <chr>       <chr>       <dbl> <dbl>
+## 1 36119  09001  Westchester County, New York Fairfield … MOVEDNET    -1768   958
+## 2 36119  36027  Westchester County, New York Dutchess C… MOVEDNET    -1119   497
+## 3 36119  06037  Westchester County, New York Los Angele… MOVEDNET     -486   339
+## 4 36119  12099  Westchester County, New York Palm Beach… MOVEDNET     -450   182
+## 5 36119  25021  Westchester County, New York Norfolk Co… MOVEDNET     -358   351
+## 6 36119  36079  Westchester County, New York Putnam Cou… MOVEDNET     -340   407

You may have noticed that there are some destination geographies that are not other counties. For people that moved into to Westchester from outside the United States, the Migration Flows data reports the region @@ -442,18 +442,18 @@

Using get_flows()MOVEDNET is NA. The GEOID of non-US places is also NA.

-
## # A tibble: 6 × 7
-##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME variable estimate   moe
-##   <chr>  <chr>  <chr>                        <chr>      <chr>       <dbl> <dbl>
-## 1 36119  NA     Westchester County, New York Africa     MOVEDIN       419   411
-## 2 36119  NA     Westchester County, New York Africa     MOVEDOUT       NA    NA
-## 3 36119  NA     Westchester County, New York Africa     MOVEDNET       NA    NA
-## 4 36119  NA     Westchester County, New York Asia       MOVEDIN      2267   436
-## 5 36119  NA     Westchester County, New York Asia       MOVEDOUT       NA    NA
-## 6 36119  NA     Westchester County, New York Asia       MOVEDNET       NA    NA
+wch_flows %>% + filter(is.na(GEOID2)) %>% + head() +
## # A tibble: 6 × 7
+##   GEOID1 GEOID2 FULL1_NAME                   FULL2_NAME variable estimate   moe
+##   <chr>  <chr>  <chr>                        <chr>      <chr>       <dbl> <dbl>
+## 1 36119  NA     Westchester County, New York Africa     MOVEDIN       419   411
+## 2 36119  NA     Westchester County, New York Africa     MOVEDOUT       NA    NA
+## 3 36119  NA     Westchester County, New York Africa     MOVEDNET       NA    NA
+## 4 36119  NA     Westchester County, New York Asia       MOVEDIN      2267   436
+## 5 36119  NA     Westchester County, New York Asia       MOVEDOUT       NA    NA
+## 6 36119  NA     Westchester County, New York Asia       MOVEDNET       NA    NA

Demographic characteristics @@ -463,25 +463,25 @@

Demographic characteristics
-la_flows <- get_flows(
-  geography = "metropolitan statistical area",
-  breakdown = "RACE",
-  breakdown_labels = TRUE,
-  msa = 31080,   # los angeles msa fips code
-  year = 2015
-  )
-
-# net migration between la and san francisco
-la_flows %>% 
-  filter(str_detect(FULL2_NAME, "San Fran"), variable == "MOVEDNET")

-
## # A tibble: 5 × 9
-##   GEOID1 GEOID2 FULL1_NAME   FULL2_NAME RACE  RACE_label variable estimate   moe
-##   <chr>  <chr>  <chr>        <chr>      <chr> <chr>      <chr>       <dbl> <dbl>
-## 1 31080  41860  Los Angeles… San Franc… 00    All races  MOVEDNET    -2433  1585
-## 2 31080  41860  Los Angeles… San Franc… 01    White alo… MOVEDNET    -1077  1096
-## 3 31080  41860  Los Angeles… San Franc… 02    Black or … MOVEDNET       98   378
-## 4 31080  41860  Los Angeles… San Franc… 03    Asian alo… MOVEDNET     -580   778
-## 5 31080  41860  Los Angeles… San Franc… 04    Other rac… MOVEDNET     -874   549
+la_flows <- get_flows( + geography = "metropolitan statistical area", + breakdown = "RACE", + breakdown_labels = TRUE, + msa = 31080, # los angeles msa fips code + year = 2015 + ) + +# net migration between la and san francisco +la_flows %>% + filter(str_detect(FULL2_NAME, "San Fran"), variable == "MOVEDNET") +
## # A tibble: 5 × 9
+##   GEOID1 GEOID2 FULL1_NAME   FULL2_NAME RACE  RACE_label variable estimate   moe
+##   <chr>  <chr>  <chr>        <chr>      <chr> <chr>      <chr>       <dbl> <dbl>
+## 1 31080  41860  Los Angeles… San Franc… 00    All races  MOVEDNET    -2433  1585
+## 2 31080  41860  Los Angeles… San Franc… 01    White alo… MOVEDNET    -1077  1096
+## 3 31080  41860  Los Angeles… San Franc… 02    Black or … MOVEDNET       98   378
+## 4 31080  41860  Los Angeles… San Franc… 03    Asian alo… MOVEDNET     -580   778
+## 5 31080  41860  Los Angeles… San Franc… 04    Other rac… MOVEDNET     -874   549

Note that the demographic characteristics must be specified in the breakdown argument of get_flows() (not the variable argument). For each dataset there are three or @@ -505,31 +505,31 @@

Mapping migration flowscentroid2 column.

-phx_flows <- get_flows(
-  geography = "metropolitan statistical area",
-  msa = 38060,
-  year = 2018,
-  geometry = TRUE
-  )
-
-phx_flows %>% 
-  head()
-
## Simple feature collection with 6 features and 7 fields
-## Active geometry column: centroid1
-## Geometry type: POINT
-## Dimension:     XY
-## Bounding box:  xmin: -112.0705 ymin: 33.18571 xmax: -112.0705 ymax: 33.18571
-## Geodetic CRS:  NAD83
-## # A tibble: 6 × 9
-##   GEOID1 GEOID2 FULL1_NAME                    FULL2_NAME variable estimate   moe
-##   <chr>  <chr>  <chr>                         <chr>      <chr>       <dbl> <dbl>
-## 1 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDIN     21602  1464
-## 2 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDOUT    21192  1559
-## 3 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDNET      410  2186
-## 4 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Africa     MOVEDIN      1078   385
-## 5 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Africa     MOVEDOUT       NA    NA
-## 6 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Africa     MOVEDNET       NA    NA
-## # ℹ 2 more variables: centroid1 <POINT [°]>, centroid2 <POINT [°]>
+phx_flows <- get_flows( + geography = "metropolitan statistical area", + msa = 38060, + year = 2018, + geometry = TRUE + ) + +phx_flows %>% + head() +
## Simple feature collection with 6 features and 7 fields
+## Active geometry column: centroid1
+## Geometry type: POINT
+## Dimension:     XY
+## Bounding box:  xmin: -112.0705 ymin: 33.18571 xmax: -112.0705 ymax: 33.18571
+## Geodetic CRS:  NAD83
+## # A tibble: 6 × 9
+##   GEOID1 GEOID2 FULL1_NAME                    FULL2_NAME variable estimate   moe
+##   <chr>  <chr>  <chr>                         <chr>      <chr>       <dbl> <dbl>
+## 1 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDIN     21602  1464
+## 2 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDOUT    21192  1559
+## 3 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Outside M… MOVEDNET      410  2186
+## 4 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Africa     MOVEDIN      1078   385
+## 5 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Africa     MOVEDOUT       NA    NA
+## 6 38060  NA     Phoenix-Mesa-Scottsdale, AZ … Africa     MOVEDNET       NA    NA
+## # ℹ 2 more variables: centroid1 <POINT [°]>, centroid2 <POINT [°]>

With the centroids attached to each pair of places, it is straightforward to map the migration flows. Here, we look at the most common origin MSAs for people moving to Phoenix-Mesa-Scottsdale, AZ. To @@ -537,30 +537,30 @@

Mapping migration flows. To use mapdeck, you’ll need a Mapbox account and access token.

-library(mapdeck)
-
-top_move_in <- phx_flows %>% 
-  filter(!is.na(GEOID2), variable == "MOVEDIN") %>% 
-  slice_max(n = 25, order_by = estimate) %>% 
-  mutate(
-    width = estimate / 500,
-    tooltip = paste0(
-      scales::comma(estimate * 5, 1),
-      " people moved from ", str_remove(FULL2_NAME, "Metro Area"),
-      " to ", str_remove(FULL1_NAME, "Metro Area"), " between 2014 and 2018"
-      )
-    )
-
-top_move_in %>% 
-  mapdeck(style = mapdeck_style("dark"), pitch = 45) %>% 
-  add_arc(
-    origin = "centroid1",
-    destination = "centroid2",
-    stroke_width = "width",
-    auto_highlight = TRUE,
-    highlight_colour = "#8c43facc",
-    tooltip = "tooltip"
-  )
+library(mapdeck) + +top_move_in <- phx_flows %>% + filter(!is.na(GEOID2), variable == "MOVEDIN") %>% + slice_max(n = 25, order_by = estimate) %>% + mutate( + width = estimate / 500, + tooltip = paste0( + scales::comma(estimate * 5, 1), + " people moved from ", str_remove(FULL2_NAME, "Metro Area"), + " to ", str_remove(FULL1_NAME, "Metro Area"), " between 2014 and 2018" + ) + ) + +top_move_in %>% + mapdeck(style = mapdeck_style("dark"), pitch = 45) %>% + add_arc( + origin = "centroid1", + destination = "centroid2", + stroke_width = "width", + auto_highlight = TRUE, + highlight_colour = "#8c43facc", + tooltip = "tooltip" + )

Migration Flow map showingpeople moving to Phoenix with arcs from the top 25 Metro areas whose width represents the number of people moving

diff --git a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png index d3613b1..01e462d 100644 Binary files a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png index 9913c3e..b827da0 100644 Binary files a/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/other-datasets_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/pums-data.html b/docs/articles/pums-data.html index a84625c..f1270e0 100644 --- a/docs/articles/pums-data.html +++ b/docs/articles/pums-data.html @@ -40,7 +40,7 @@ tidycensus - 1.5 + 1.6.1 @@ -172,27 +172,27 @@

PUMS data dictionaries
-install.packages(c("survey", "srvyr"))
+install.packages(c("survey", "srvyr"))
-library(tidyverse)
-library(tidycensus)
-
-pums_vars_2018 <- pums_variables %>% 
-  filter(year == 2018, survey == "acs1")
+library(tidyverse) +library(tidycensus) + +pums_vars_2018 <- pums_variables %>% + filter(year == 2018, survey == "acs1")

pums_variables contains both the variables as well as their possible values. So let’s just look at the unique variables.

-pums_vars_2018 %>% 
-  distinct(var_code, var_label, data_type, level)
-
## # A tibble: 513 × 4
-##   var_code var_label                                                                      data_type level
-##   <chr>    <chr>                                                                          <chr>     <chr>
-## 1 SERIALNO Housing unit/GQ person serial number                                           chr       NA   
-## 2 DIVISION Division code based on 2010 Census definitions                                 chr       NA   
-## 3 PUMA     Public use microdata area code (PUMA) based on 2010 Census definition (areas … chr       NA   
-## 4 REGION   Region code based on 2010 Census definitions                                   chr       NA   
-## 5 ST       State Code based on 2010 Census definitions                                    chr       NA   
-## # ℹ 508 more rows
+pums_vars_2018 %>% + distinct(var_code, var_label, data_type, level) +
## # A tibble: 513 × 4
+##   var_code var_label                                                                      data_type level
+##   <chr>    <chr>                                                                          <chr>     <chr>
+## 1 SERIALNO Housing unit/GQ person serial number                                           chr       NA   
+## 2 DIVISION Division code based on 2010 Census definitions                                 chr       NA   
+## 3 PUMA     Public use microdata area code (PUMA) based on 2010 Census definition (areas … chr       NA   
+## 4 REGION   Region code based on 2010 Census definitions                                   chr       NA   
+## 5 ST       State Code based on 2010 Census definitions                                    chr       NA   
+## # ℹ 508 more rows

If you’re new to PUMS data, this is a good dataset to browse to get a feel for what variables are available.

@@ -212,18 +212,18 @@

Person vs. housing unit
-pums_vars_2018 %>% 
-  distinct(var_code, var_label, data_type, level) %>% 
-  filter(level == "person")
-
## # A tibble: 279 × 4
-##   var_code var_label                       data_type level 
-##   <chr>    <chr>                           <chr>     <chr> 
-## 1 SPORDER  Person number                   num       person
-## 2 PWGTP    Person's weight                 num       person
-## 3 AGEP     Age                             num       person
-## 4 CIT      Citizenship status              chr       person
-## 5 CITWP    Year of naturalization write-in num       person
-## # ℹ 274 more rows
+pums_vars_2018 %>% + distinct(var_code, var_label, data_type, level) %>% + filter(level == "person") +
## # A tibble: 279 × 4
+##   var_code var_label                       data_type level 
+##   <chr>    <chr>                           <chr>     <chr> 
+## 1 SPORDER  Person number                   num       person
+## 2 PWGTP    Person's weight                 num       person
+## 3 AGEP     Age                             num       person
+## 4 CIT      Citizenship status              chr       person
+## 5 CITWP    Year of naturalization write-in num       person
+## # ℹ 274 more rows

It is important to be mindful of whether the variables you choose to analyze are person- or household-level variables.

@@ -238,23 +238,23 @@

Using get_pums() to d get PUMA, SEX, AGEP, and SCHL variables for Vermont from the 2018 1-year ACS.

-vt_pums <- get_pums(
-  variables = c("PUMA", "SEX", "AGEP", "SCHL"),
-  state = "VT",
-  survey = "acs1",
-  year = 2018
-  )
+vt_pums <- get_pums( + variables = c("PUMA", "SEX", "AGEP", "SCHL"), + state = "VT", + survey = "acs1", + year = 2018 + )
-vt_pums
-
## # A tibble: 6,436 × 9
-##   SERIALNO      SPORDER  WGTP PWGTP  AGEP PUMA  ST    SCHL  SEX  
-##   <chr>           <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
-## 1 2018GQ0000859       1     0    61    19 00200 50    19    1    
-## 2 2018GQ0001119       1     0    67    80 00200 50    11    2    
-## 3 2018GQ0001888       1     0   177    82 00400 50    16    2    
-## 4 2018GQ0002438       1     0    17    17 00100 50    16    2    
-## 5 2018GQ0003293       1     0    68    20 00400 50    19    2    
-## # ℹ 6,431 more rows
+vt_pums +
## # A tibble: 6,436 × 9
+##   SERIALNO      SPORDER  WGTP PWGTP  AGEP PUMA  ST    SCHL  SEX  
+##   <chr>           <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
+## 1 2018GQ0002083       1     0    59    20 00100 50    19    2    
+## 2 2018GQ0002131       1     0     2    51 00400 50    16    1    
+## 3 2018GQ0002587       1     0    52    18 00400 50    16    1    
+## 4 2018GQ0003751       1     0    34    19 00100 50    19    2    
+## 5 2018GQ0003853       1     0   129    20 00400 50    19    1    
+## # ℹ 6,431 more rows

We get 6436 rows and 9 columns. In addition to the variables we specified, get_pums() also always returns SERIALNO, SPORDER, WGTP, @@ -269,24 +269,24 @@

Using get_pums() to d recode = TRUE in get_pums() to return additional columns with the values of these variables recoded.

-vt_pums_recoded <- get_pums(
-  variables = c("PUMA", "SEX", "AGEP", "SCHL"),
-  state = "VT",
-  survey = "acs1",
-  year = 2018,
-  recode = TRUE
-  )
+vt_pums_recoded <- get_pums( + variables = c("PUMA", "SEX", "AGEP", "SCHL"), + state = "VT", + survey = "acs1", + year = 2018, + recode = TRUE + )
-vt_pums_recoded
-
## # A tibble: 6,436 × 12
-##   SERIALNO      SPORDER  WGTP PWGTP  AGEP PUMA  ST    SCHL  SEX   ST_label   SCHL_label         SEX_label
-##   <chr>           <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <ord>      <ord>              <ord>    
-## 1 2018GQ0000859       1     0    61    19 00200 50    19    1     Vermont/VT 1 or more years o… Male     
-## 2 2018GQ0001119       1     0    67    80 00200 50    11    2     Vermont/VT Grade 8            Female   
-## 3 2018GQ0001888       1     0   177    82 00400 50    16    2     Vermont/VT Regular high scho… Female   
-## 4 2018GQ0002438       1     0    17    17 00100 50    16    2     Vermont/VT Regular high scho… Female   
-## 5 2018GQ0003293       1     0    68    20 00400 50    19    2     Vermont/VT 1 or more years o… Female   
-## # ℹ 6,431 more rows
+vt_pums_recoded +
## # A tibble: 6,436 × 12
+##   SERIALNO      SPORDER  WGTP PWGTP  AGEP PUMA  ST    SCHL  SEX   ST_label   SCHL_label         SEX_label
+##   <chr>           <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <ord>      <ord>              <ord>    
+## 1 2018GQ0001230       1     0     3    26 00300 50    20    1     Vermont/VT Associate's degree Male     
+## 2 2018GQ0002135       1     0    64    18 00100 50    19    2     Vermont/VT 1 or more years o… Female   
+## 3 2018GQ0002999       1     0    62    20 00400 50    19    2     Vermont/VT 1 or more years o… Female   
+## 4 2018GQ0004077       1     0    16    94 00200 50    21    2     Vermont/VT Bachelor's degree  Female   
+## 5 2018GQ0006486       1     0    26    20 00400 50    19    1     Vermont/VT 1 or more years o… Male     
+## # ℹ 6,431 more rows

Analyzing PUMS data @@ -300,26 +300,26 @@

Analyzing PUMS data
-sum(vt_pums_recoded$PWGTP)

-
## [1] 626299
+sum(vt_pums_recoded$PWGTP) +
## [1] 626299

Another convenient approach to weighting PUMS data is to use the wt argument in dplyr::count(). Here, we calculate the population by sex for each PUMA in Vermont (there are only four in the whole state!).

-vt_pums_recoded %>% 
-  count(PUMA, SEX_label, wt = PWGTP)
-
## # A tibble: 8 × 3
-##   PUMA  SEX_label      n
-##   <chr> <ord>      <dbl>
-## 1 00100 Male      108732
-## 2 00100 Female    111564
-## 3 00200 Male       73768
-## 4 00200 Female     73416
-## 5 00300 Male       61601
-## 6 00300 Female     64806
-## 7 00400 Male       65675
-## 8 00400 Female     66737
+vt_pums_recoded %>% + count(PUMA, SEX_label, wt = PWGTP) +
## # A tibble: 8 × 3
+##   PUMA  SEX_label      n
+##   <chr> <ord>      <dbl>
+## 1 00100 Male      108732
+## 2 00100 Female    111564
+## 3 00200 Male       73768
+## 4 00200 Female     73416
+## 5 00300 Male       61601
+## 6 00300 Female     64806
+## 7 00400 Male       65675
+## 8 00400 Female     66737

Many of the variables included in the PUMS data are categorical and we might want to group some categories together and estimate the proportion of the population with these characteristics. In this @@ -328,27 +328,27 @@

Analyzing PUMS data
-vt_pums_recoded %>% 
-  mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% 
-  group_by(PUMA, SEX_label) %>% 
-  summarize(
-    total_pop = sum(PWGTP),
-    mean_age = weighted.mean(AGEP, PWGTP),
-    ba_above = sum(PWGTP[ba_above == TRUE & AGEP >= 25]),
-    ba_above_pct = ba_above / sum(PWGTP[AGEP >= 25])
-  )
-
## # A tibble: 8 × 6
-## # Groups:   PUMA [4]
-##   PUMA  SEX_label total_pop mean_age ba_above ba_above_pct
-##   <chr> <ord>         <dbl>    <dbl>    <dbl>        <dbl>
-## 1 00100 Male         108732     38.2    34113        0.469
-## 2 00100 Female       111564     40.4    36873        0.473
-## 3 00200 Male          73768     41.4    15831        0.303
-## 4 00200 Female        73416     43.6    20248        0.367
-## 5 00300 Male          61601     43.7    14869        0.326
-## 6 00300 Female        64806     45.0    21527        0.434
-## 7 00400 Male          65675     41.9    12788        0.278
-## 8 00400 Female        66737     44.6    18980        0.391
+vt_pums_recoded %>% + mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% + group_by(PUMA, SEX_label) %>% + summarize( + total_pop = sum(PWGTP), + mean_age = weighted.mean(AGEP, PWGTP), + ba_above = sum(PWGTP[ba_above == TRUE & AGEP >= 25]), + ba_above_pct = ba_above / sum(PWGTP[AGEP >= 25]) + ) +
## # A tibble: 8 × 6
+## # Groups:   PUMA [4]
+##   PUMA  SEX_label total_pop mean_age ba_above ba_above_pct
+##   <chr> <ord>         <dbl>    <dbl>    <dbl>        <dbl>
+## 1 00100 Male         108732     38.2    34113        0.469
+## 2 00100 Female       111564     40.4    36873        0.473
+## 3 00200 Male          73768     41.4    15831        0.303
+## 4 00200 Female        73416     43.6    20248        0.367
+## 5 00300 Male          61601     43.7    14869        0.326
+## 6 00300 Female        64806     45.0    21527        0.434
+## 7 00400 Male          65675     41.9    12788        0.278
+## 8 00400 Female        66737     44.6    18980        0.391

Calculating standard errors @@ -379,18 +379,18 @@

Calculating standard errors
-vt_pums_rep_weights <- get_pums(
-  variables = c("PUMA", "SEX", "AGEP", "SCHL"),
-  state = "VT",
-  survey = "acs1",
-  year = 2018,
-  recode = TRUE,
-  rep_weights = "person"
-  )

+vt_pums_rep_weights <- get_pums( + variables = c("PUMA", "SEX", "AGEP", "SCHL"), + state = "VT", + survey = "acs1", + year = 2018, + recode = TRUE, + rep_weights = "person" + )

To easily convert this data frame to a survey or srvyr object, we can use the to_survey() function.

-vt_survey_design <- to_survey(vt_pums_rep_weights)
+vt_survey_design <- to_survey(vt_pums_rep_weights)

By default, to_survey() converts a data frame to a tbl_svy object by using person replicate weights. You can change the arguments in to_survey if you are analyzing @@ -402,61 +402,61 @@

Calculating standard errors
-library(srvyr, warn.conflicts = FALSE)
-
-vt_survey_design %>% 
-  survey_count(PUMA, SEX_label)
-
## # A tibble: 8 × 4
-##   PUMA  SEX_label      n  n_se
-##   <chr> <ord>      <dbl> <dbl>
-## 1 00100 Male      108732 1123.
-## 2 00100 Female    111564 1360.
-## 3 00200 Male       73768  754.
-## 4 00200 Female     73416  809.
-## 5 00300 Male       61601  491.
-## 6 00300 Female     64806  578.
-## 7 00400 Male       65675  702.
-## 8 00400 Female     66737  776.
+library(srvyr, warn.conflicts = FALSE) + +vt_survey_design %>% + survey_count(PUMA, SEX_label) +
## # A tibble: 8 × 4
+##   PUMA  SEX_label      n  n_se
+##   <chr> <ord>      <dbl> <dbl>
+## 1 00100 Male      108732 1123.
+## 2 00100 Female    111564 1360.
+## 3 00200 Male       73768  754.
+## 4 00200 Female     73416  809.
+## 5 00300 Male       61601  491.
+## 6 00300 Female     64806  578.
+## 7 00400 Male       65675  702.
+## 8 00400 Female     66737  776.

The srvyr syntax is very similar to standard dplyr syntax, so this should look familiar; we’ve swapped out count() for survey_count() and we don’t need a wt argument because we defined the weights when we set up the srvyr object.

The equivalent estimate using survey syntax looks like this:

-survey::svyby(~SEX_label, ~PUMA, design = vt_survey_design, survey::svytotal)
-
##        PUMA SEX_labelMale SEX_labelFemale       se1       se2
-## 00100 00100        108732          111564 1122.9765 1360.3476
-## 00200 00200         73768           73416  754.0906  808.7364
-## 00300 00300         61601           64806  490.5432  578.3529
-## 00400 00400         65675           66737  702.0659  775.9812
+survey::svyby(~SEX_label, ~PUMA, design = vt_survey_design, survey::svytotal) +
##        PUMA SEX_labelMale SEX_labelFemale       se1       se2
+## 00100 00100        108732          111564 1122.9765 1360.3476
+## 00200 00200         73768           73416  754.0906  808.7364
+## 00300 00300         61601           64806  490.5432  578.3529
+## 00400 00400         65675           66737  702.0659  775.9812

We can also repeat the estimate we did above and calculate the percentage of people that are 25 and up with a bachelor’s degree, while this time returning the upper and lower bounds of the confidence interval for these estimates. This time though, we have to subset our data frame to only those 25 and older before we summarize.

-vt_survey_design %>% 
-  mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% 
-  filter(AGEP >= 25) %>% 
-  group_by(PUMA, SEX_label) %>% 
-  summarize(
-    age_25_up = survey_total(vartype = "ci"),
-    ba_above_n = survey_total(ba_above, vartype = "ci"),
-    ba_above_pct = survey_mean(ba_above, vartype = "ci")
-    )
-
## # A tibble: 8 × 11
-## # Groups:   PUMA [4]
-##   PUMA  SEX_label age_25_up age_25_up_low age_25_up_upp ba_above_n ba_above_n_low ba_above_n_upp
-##   <chr> <ord>         <dbl>         <dbl>         <dbl>      <dbl>          <dbl>          <dbl>
-## 1 00100 Male          72680        70216.        75144.      34113         29913.         38313.
-## 2 00100 Female        77966        75671.        80261.      36873         32202.         41544.
-## 3 00200 Male          52278        50826.        53730.      15831         13327.         18335.
-## 4 00200 Female        55162        53643.        56681.      20248         17679.         22817.
-## 5 00300 Male          45634        44743.        46525.      14869         12638.         17100.
-## 6 00300 Female        49546        48576.        50516.      21527         19010.         24044.
-## 7 00400 Male          45960        45067.        46853.      12788         10699.         14877.
-## 8 00400 Female        48601        47783.        49419.      18980         16540.         21420.
-## # ℹ 3 more variables: ba_above_pct <dbl>, ba_above_pct_low <dbl>, ba_above_pct_upp <dbl>
+vt_survey_design %>% + mutate(ba_above = SCHL %in% c("21", "22", "23", "24")) %>% + filter(AGEP >= 25) %>% + group_by(PUMA, SEX_label) %>% + summarize( + age_25_up = survey_total(vartype = "ci"), + ba_above_n = survey_total(ba_above, vartype = "ci"), + ba_above_pct = survey_mean(ba_above, vartype = "ci") + ) +
## # A tibble: 8 × 11
+## # Groups:   PUMA [4]
+##   PUMA  SEX_label age_25_up age_25_up_low age_25_up_upp ba_above_n ba_above_n_low ba_above_n_upp
+##   <chr> <ord>         <dbl>         <dbl>         <dbl>      <dbl>          <dbl>          <dbl>
+## 1 00100 Male          72680        70216.        75144.      34113         29913.         38313.
+## 2 00100 Female        77966        75671.        80261.      36873         32202.         41544.
+## 3 00200 Male          52278        50826.        53730.      15831         13327.         18335.
+## 4 00200 Female        55162        53643.        56681.      20248         17679.         22817.
+## 5 00300 Male          45634        44743.        46525.      14869         12638.         17100.
+## 6 00300 Female        49546        48576.        50516.      21527         19010.         24044.
+## 7 00400 Male          45960        45067.        46853.      12788         10699.         14877.
+## 8 00400 Female        48601        47783.        49419.      18980         16540.         21420.
+## # ℹ 3 more variables: ba_above_pct <dbl>, ba_above_pct_low <dbl>, ba_above_pct_upp <dbl>

Modeling with PUMS data @@ -467,85 +467,85 @@

Modeling with PUMS data
-vt_pums_to_model <- get_pums(
-  variables = c("PUMA", "WAGP", "JWMNP", "JWTR", "COW", "ESR"),
-  state = "VT",
-  survey = "acs5",
-  year = 2018,
-  rep_weights = "person"
-  )
+vt_pums_to_model <- get_pums( + variables = c("PUMA", "WAGP", "JWMNP", "JWTR", "COW", "ESR"), + state = "VT", + survey = "acs5", + year = 2018, + rep_weights = "person" + )

Now, we filter out observations that aren’t relevant, do a little recoding of the class of worker variable, and finally convert the data frame to a survey design object.

-vt_model_sd <- vt_pums_to_model %>% 
-  filter(
-    ESR == 1,   # civilian employed
-    JWTR != 11, # does not work at home
-    WAGP > 0,   # earned wages last year
-    JWMNP > 0   # commute more than zero min
-    ) %>%
-  mutate(
-    emp_type = case_when(
-      COW %in% c("1", "2")      ~ "private",
-      COW %in% c("3", "4", "5") ~ "public",
-      TRUE                      ~ "self"
-      )
-    ) %>%
-  to_survey()
+vt_model_sd <- vt_pums_to_model %>% + filter( + ESR == 1, # civilian employed + JWTR != 11, # does not work at home + WAGP > 0, # earned wages last year + JWMNP > 0 # commute more than zero min + ) %>% + mutate( + emp_type = case_when( + COW %in% c("1", "2") ~ "private", + COW %in% c("3", "4", "5") ~ "public", + TRUE ~ "self" + ) + ) %>% + to_survey()

Let’s quickly check out some summary stats using srvyr.

-vt_model_sd %>% 
-  summarize(
-    n              = survey_total(1),
-    mean_wage      = survey_mean(WAGP),
-    median_wage    = survey_median(WAGP),
-    mean_commute   = survey_mean(JWMNP),
-    median_commute = survey_median(JWMNP)
-    )
-
## # A tibble: 1 × 10
-##        n  n_se mean_wage mean_wage_se median_wage median_wage_se mean_commute mean_commute_se
-##    <dbl> <dbl>     <dbl>        <dbl>       <dbl>          <dbl>        <dbl>           <dbl>
-## 1 282733 1933.    44601.         437.       35000           251.         23.3           0.233
-## # ℹ 2 more variables: median_commute <dbl>, median_commute_se <dbl>
+vt_model_sd %>% + summarize( + n = survey_total(1), + mean_wage = survey_mean(WAGP), + median_wage = survey_median(WAGP), + mean_commute = survey_mean(JWMNP), + median_commute = survey_median(JWMNP) + ) +
## # A tibble: 1 × 10
+##        n  n_se mean_wage mean_wage_se median_wage median_wage_se mean_commute mean_commute_se
+##    <dbl> <dbl>     <dbl>        <dbl>       <dbl>          <dbl>        <dbl>           <dbl>
+## 1 282733 1933.    44601.         437.       35000           251.         23.3           0.233
+## # ℹ 2 more variables: median_commute <dbl>, median_commute_se <dbl>
-vt_model_sd %>% 
-  survey_count(emp_type)
-
## # A tibble: 3 × 3
-##   emp_type      n  n_se
-##   <chr>     <dbl> <dbl>
-## 1 private  224760 2001.
-## 2 public    42966 1374.
-## 3 self      15007  756.
+vt_model_sd %>% + survey_count(emp_type) +
## # A tibble: 3 × 3
+##   emp_type      n  n_se
+##   <chr>     <dbl> <dbl>
+## 1 private  224760 2001.
+## 2 public    42966 1374.
+## 3 self      15007  756.

And now we’re ready to fit a simple linear regression model.

-model <- survey::svyglm(log(JWMNP) ~ log(WAGP) + emp_type + PUMA, design = vt_model_sd)
-summary(model)
-
## 
-## Call:
-## survey::svyglm(formula = log(JWMNP) ~ log(WAGP) + emp_type + 
-##     PUMA, design = vt_model_sd)
-## 
-## Survey design:
-## Called via srvyr
-## 
-## Coefficients:
-##                 Estimate Std. Error t value Pr(>|t|)    
-## (Intercept)     1.699601   0.102181  16.633  < 2e-16 ***
-## log(WAGP)       0.113913   0.009548  11.931  < 2e-16 ***
-## emp_typepublic -0.052845   0.027420  -1.927   0.0578 .  
-## emp_typeself   -0.306227   0.050016  -6.123 4.20e-08 ***
-## PUMA00200       0.007800   0.023943   0.326   0.7455    
-## PUMA00300       0.023629   0.025111   0.941   0.3498    
-## PUMA00400      -0.117300   0.026028  -4.507 2.46e-05 ***
-## ---
-## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-## 
-## (Dispersion parameter for gaussian family taken to be 9328.921)
-## 
-## Number of Fisher Scoring iterations: 2
+model <- survey::svyglm(log(JWMNP) ~ log(WAGP) + emp_type + PUMA, design = vt_model_sd) +summary(model) +
## 
+## Call:
+## survey::svyglm(formula = log(JWMNP) ~ log(WAGP) + emp_type + 
+##     PUMA, design = vt_model_sd)
+## 
+## Survey design:
+## Called via srvyr
+## 
+## Coefficients:
+##                 Estimate Std. Error t value Pr(>|t|)    
+## (Intercept)     1.699601   0.102181  16.633  < 2e-16 ***
+## log(WAGP)       0.113913   0.009548  11.931  < 2e-16 ***
+## emp_typepublic -0.052845   0.027420  -1.927   0.0578 .  
+## emp_typeself   -0.306227   0.050016  -6.123 4.20e-08 ***
+## PUMA00200       0.007800   0.023943   0.326   0.7455    
+## PUMA00300       0.023629   0.025111   0.941   0.3498    
+## PUMA00400      -0.117300   0.026028  -4.507 2.46e-05 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## (Dispersion parameter for gaussian family taken to be 9328.921)
+## 
+## Number of Fisher Scoring iterations: 2

Mapping PUMS data @@ -555,40 +555,40 @@

Mapping PUMS data
-ne_states <- c("VT", "NH", "ME", "MA", "CT", "RI")
-ne_pumas <- map(ne_states, tigris::pumas, class = "sf", cb = TRUE, year = 2018) %>% 
-  reduce(rbind)

+ne_states <- c("VT", "NH", "ME", "MA", "CT", "RI") +ne_pumas <- map(ne_states, tigris::pumas, class = "sf", cb = TRUE, year = 2018) %>% + reduce(rbind)

Next we download the income-to-poverty ratio from the PUMS dataset and calculate the percentage of population below 200% of the poverty line for each PUMA.

-ne_pums <- get_pums(
-  variables = c("PUMA", "POVPIP"),
-  state = ne_states,
-  survey = "acs1",
-  year = 2018
-  )
-
-ne_pov <- ne_pums %>%
-  group_by(ST, PUMA) %>%
-  summarize(
-    total_pop = sum(PWGTP),
-    pct_in_pov = sum(PWGTP[POVPIP < 200]) / total_pop
-  )
+ne_pums <- get_pums( + variables = c("PUMA", "POVPIP"), + state = ne_states, + survey = "acs1", + year = 2018 + ) + +ne_pov <- ne_pums %>% + group_by(ST, PUMA) %>% + summarize( + total_pop = sum(PWGTP), + pct_in_pov = sum(PWGTP[POVPIP < 200]) / total_pop + )

And now we can make a choropleth map by joining the PUMA boundaries with the PUMS data.

-ne_pumas %>%
-  left_join(ne_pov, by = c("STATEFP10" = "ST", "PUMACE10" = "PUMA")) %>%
-  ggplot(aes(fill = pct_in_pov)) +
-  geom_sf() +
-  scale_fill_viridis_b(
-    name = NULL,
-    option = "magma",
-    labels = scales::label_percent(1)
-    ) +
-  labs(title = "Percentage of population below 200% of the poverty line") +
-  theme_void()
+ne_pumas %>% + left_join(ne_pov, by = c("STATEFP10" = "ST", "PUMACE10" = "PUMA")) %>% + ggplot(aes(fill = pct_in_pov)) + + geom_sf() + + scale_fill_viridis_b( + name = NULL, + option = "magma", + labels = scales::label_percent(1) + ) + + labs(title = "Percentage of population below 200% of the poverty line") + + theme_void()

@@ -633,57 +633,57 @@

Verification of PUMS estimates
-wy_relp <- get_pums(
-  variables = "RELP",
-  state = "Wyoming",
-  survey = "acs1",
-  year = 2018,
-  rep_weights = "person"
-)
-
-ut_ten <- get_pums(
-  variables = "TEN",
-  state = "Utah",
-  survey = "acs1",
-  year = 2018,
-  rep_weights = "housing"
-)
-
-hi_age <- get_pums(
-  variables = "AGEP",
-  state = "Hawaii",
-  survey = "acs1",
-  year = 2018,
-  rep_weights = "person"
-)

+wy_relp <- get_pums( + variables = "RELP", + state = "Wyoming", + survey = "acs1", + year = 2018, + rep_weights = "person" +) + +ut_ten <- get_pums( + variables = "TEN", + state = "Utah", + survey = "acs1", + year = 2018, + rep_weights = "housing" +) + +hi_age <- get_pums( + variables = "AGEP", + state = "Hawaii", + survey = "acs1", + year = 2018, + rep_weights = "person" +)
-wy_relp %>% 
-  to_survey() %>% 
-  survey_count(RELP) %>% 
-  filter(RELP == "16")
-
## # A tibble: 1 × 3
-##   RELP      n  n_se
-##   <chr> <dbl> <dbl>
-## 1 16     7018  2.64
+wy_relp %>% + to_survey() %>% + survey_count(RELP) %>% + filter(RELP == "16") +
## # A tibble: 1 × 3
+##   RELP      n  n_se
+##   <chr> <dbl> <dbl>
+## 1 16     7018  2.64
-ut_ten %>% 
-  distinct(SERIALNO, .keep_all = TRUE) %>%
-  to_survey(type = "housing") %>% 
-  survey_count(TEN) %>% 
-  filter(TEN == 2)
-
## # A tibble: 1 × 3
-##   TEN        n  n_se
-##   <chr>  <dbl> <dbl>
-## 1 2     209632 3972.
+ut_ten %>% + distinct(SERIALNO, .keep_all = TRUE) %>% + to_survey(type = "housing") %>% + survey_count(TEN) %>% + filter(TEN == 2) +
## # A tibble: 1 × 3
+##   TEN        n  n_se
+##   <chr>  <dbl> <dbl>
+## 1 2     209632 3972.
-hi_age %>% 
-  filter(between(AGEP, 0, 4)) %>% 
-  to_survey() %>% 
-  summarize(age_0_4 = survey_total(1))
-
## # A tibble: 1 × 2
-##   age_0_4 age_0_4_se
-##     <dbl>      <dbl>
-## 1   86452       944.
+hi_age %>% + filter(between(AGEP, 0, 4)) %>% + to_survey() %>% + summarize(age_0_4 = survey_total(1)) +
## # A tibble: 1 × 2
+##   age_0_4 age_0_4_se
+##     <dbl>      <dbl>
+## 1   86452       944.

3 for 3 – yay!

diff --git a/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png b/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png index 4fbeb77..570de5f 100644 Binary files a/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png and b/docs/articles/pums-data_files/figure-html/unnamed-chunk-23-1.png differ diff --git a/docs/articles/spatial-data.html b/docs/articles/spatial-data.html index fc66a07..12fd817 100644 --- a/docs/articles/spatial-data.html +++ b/docs/articles/spatial-data.html @@ -40,7 +40,7 @@ tidycensus - 1.5 + 1.6.1
@@ -124,39 +124,39 @@

Spatial data in tidycensus

The following example shows median household income from the 2016-2020 ACS for Census tracts in Orange County, California:

-library(tidycensus)
-library(tidyverse)
-options(tigris_use_cache = TRUE)
-
-orange <- get_acs(
-  state = "CA",
-  county = "Orange",
-  geography = "tract",
-  variables = "B19013_001",
-  geometry = TRUE,
-  year = 2020
-)
-
-head(orange)
-
## Simple feature collection with 6 features and 5 fields
-## Geometry type: MULTIPOLYGON
-## Dimension:     XY
-## Bounding box:  xmin: -118.0369 ymin: 33.69354 xmax: -117.7822 ymax: 33.85749
-## Geodetic CRS:  NAD83
-##         GEOID                                            NAME   variable
-## 1 06059086701  Census Tract 867.01, Orange County, California B19013_001
-## 2 06059075901  Census Tract 759.01, Orange County, California B19013_001
-## 3 06059075303  Census Tract 753.03, Orange County, California B19013_001
-## 4 06059052527  Census Tract 525.27, Orange County, California B19013_001
-## 5 06059110109 Census Tract 1101.09, Orange County, California B19013_001
-## 6 06059087106  Census Tract 871.06, Orange County, California B19013_001
-##   estimate   moe                       geometry
-## 1    86922 11391 MULTIPOLYGON (((-117.9762 3...
-## 2    78846 10972 MULTIPOLYGON (((-117.8618 3...
-## 3   123654 21900 MULTIPOLYGON (((-117.8824 3...
-## 4   135097 10971 MULTIPOLYGON (((-117.8035 3...
-## 5   107463 12665 MULTIPOLYGON (((-118.0369 3...
-## 6    45327  8700 MULTIPOLYGON (((-117.9414 3...
+library(tidycensus) +library(tidyverse) +options(tigris_use_cache = TRUE) + +orange <- get_acs( + state = "CA", + county = "Orange", + geography = "tract", + variables = "B19013_001", + geometry = TRUE, + year = 2020 +) + +head(orange) +
## Simple feature collection with 6 features and 5 fields
+## Geometry type: MULTIPOLYGON
+## Dimension:     XY
+## Bounding box:  xmin: -118.0096 ymin: 33.77397 xmax: -117.7905 ymax: 33.93992
+## Geodetic CRS:  NAD83
+##         GEOID                                            NAME   variable
+## 1 06059110603 Census Tract 1106.03, Orange County, California B19013_001
+## 2 06059011503  Census Tract 115.03, Orange County, California B19013_001
+## 3 06059001102   Census Tract 11.02, Orange County, California B19013_001
+## 4 06059021812  Census Tract 218.12, Orange County, California B19013_001
+## 5 06059001301   Census Tract 13.01, Orange County, California B19013_001
+## 6 06059088701  Census Tract 887.01, Orange County, California B19013_001
+##   estimate   moe                       geometry
+## 1    56563 13103 MULTIPOLYGON (((-118.0096 3...
+## 2   101800 10306 MULTIPOLYGON (((-117.8984 3...
+## 3    99286 18207 MULTIPOLYGON (((-117.9765 3...
+## 4   133494  8958 MULTIPOLYGON (((-117.8184 3...
+## 5    75994 18045 MULTIPOLYGON (((-117.9766 3...
+## 6    54759  7682 MULTIPOLYGON (((-117.9673 3...

Our object orange looks much like the basic tidycensus output, but with a geometry list-column describing the geometry of each feature, using the @@ -169,10 +169,10 @@

Spatial data in tidycensus

the geom_sf functionality currently in the development version of ggplot2:

-orange %>%
-  ggplot(aes(fill = estimate)) + 
-  geom_sf(color = NA) + 
-  scale_fill_viridis_c(option = "magma") 
+orange %>% + ggplot(aes(fill = estimate)) + + geom_sf(color = NA) + + scale_fill_viridis_c(option = "magma")

Please note that the UTM Zone 11N coordinate system (26911) is appropriate for Southern California but may not @@ -195,37 +195,37 @@

Faceted mapping
-racevars <- c(White = "P2_005N", 
-              Black = "P2_006N", 
-              Asian = "P2_008N", 
-              Hispanic = "P2_002N")
-
-harris <- get_decennial(
-  geography = "tract",
-  variables = racevars,
-  state = "TX",
-  county = "Harris County",
-  geometry = TRUE,
-  summary_var = "P2_001N",
-  year = 2020,
-  sumfile = "pl"
-) 
-
-head(harris)
-
## Simple feature collection with 6 features and 5 fields
-## Geometry type: MULTIPOLYGON
-## Dimension:     XY
-## Bounding box:  xmin: -95.46502 ymin: 29.53424 xmax: -95.09005 ymax: 29.96492
-## Geodetic CRS:  NAD83
-## # A tibble: 6 × 6
-##   GEOID       NAME        variable value summary_value                  geometry
-##   <chr>       <chr>       <chr>    <dbl>         <dbl>        <MULTIPOLYGON [°]>
-## 1 48201341203 Census Tra… White     1503          2355 (((-95.10641 29.54594, -…
-## 2 48201341203 Census Tra… Black      177          2355 (((-95.10641 29.54594, -…
-## 3 48201341203 Census Tra… Asian       54          2355 (((-95.10641 29.54594, -…
-## 4 48201341203 Census Tra… Hispanic   492          2355 (((-95.10641 29.54594, -…
-## 5 48201550601 Census Tra… White      265          6673 (((-95.46502 29.96456, -…
-## 6 48201550601 Census Tra… Black     2156          6673 (((-95.46502 29.96456, -…
+racevars <- c(White = "P2_005N", + Black = "P2_006N", + Asian = "P2_008N", + Hispanic = "P2_002N") + +harris <- get_decennial( + geography = "tract", + variables = racevars, + state = "TX", + county = "Harris County", + geometry = TRUE, + summary_var = "P2_001N", + year = 2020, + sumfile = "pl" +) + +head(harris) +
## Simple feature collection with 6 features and 5 fields
+## Geometry type: MULTIPOLYGON
+## Dimension:     XY
+## Bounding box:  xmin: -95.51535 ymin: 29.80887 xmax: -95.3994 ymax: 29.92537
+## Geodetic CRS:  NAD83
+## # A tibble: 6 × 6
+##   GEOID       NAME        variable value summary_value                  geometry
+##   <chr>       <chr>       <chr>    <dbl>         <dbl>        <MULTIPOLYGON [°]>
+## 1 48201530200 Census Tra… White     2057          3766 (((-95.45086 29.81984, -…
+## 2 48201530200 Census Tra… Black      127          3766 (((-95.45086 29.81984, -…
+## 3 48201530200 Census Tra… Asian      239          3766 (((-95.45086 29.81984, -…
+## 4 48201530200 Census Tra… Hispanic  1154          3766 (((-95.45086 29.81984, -…
+## 5 48201534002 Census Tra… White      388          5653 (((-95.51398 29.92533, -…
+## 6 48201534002 Census Tra… Black      685          5653 (((-95.51398 29.92533, -…

We notice that there are four entries for each Census tract, with each entry representing one of our requested variables. The summary_value column represents the value of the summary @@ -237,14 +237,14 @@

Faceted mapping
-harris %>%
-  mutate(percent = 100 * (value / summary_value)) %>%
-  ggplot(aes(fill = percent)) +
-  facet_wrap(~variable) +
-  geom_sf(color = NA) +
-  theme_void() + 
-  scale_fill_viridis_c() + 
-  labs(fill = "% of population\n(2020 Census)")
+harris %>% + mutate(percent = 100 * (value / summary_value)) %>% + ggplot(aes(fill = percent)) + + facet_wrap(~variable) + + geom_sf(color = NA) + + theme_void() + + scale_fill_viridis_c() + + labs(fill = "% of population\n(2020 Census)")

@@ -261,21 +261,21 @@

Detailed shorelin example, take this example of median household income by Census tract in Manhattan (New York County), NY:

-library(tidycensus)
-library(tidyverse)
-options(tigris_use_cache = TRUE)
-
-ny <- get_acs(geography = "tract", 
-              variables = "B19013_001", 
-              state = "NY", 
-              county = "New York", 
-              year = 2020, 
-              geometry = TRUE)
-
-ggplot(ny, aes(fill = estimate)) + 
-  geom_sf() + 
-  theme_void() + 
-  scale_fill_viridis_c(labels = scales::dollar)
+library(tidycensus) +library(tidyverse) +options(tigris_use_cache = TRUE) + +ny <- get_acs(geography = "tract", + variables = "B19013_001", + state = "NY", + county = "New York", + year = 2020, + geometry = TRUE) + +ggplot(ny, aes(fill = estimate)) + + geom_sf() + + theme_void() + + scale_fill_viridis_c(labels = scales::dollar)

As illustrated in the graphic, the boundaries of Manhattan include water boundaries - stretching into the Hudson and East Rivers. In turn, @@ -292,25 +292,158 @@

Detailed shorelin first transformed to a projected coordinate reference system to improve performance.

-library(tigris)
-library(sf)
-
-ny_erase <- get_acs(
-  geography = "tract",
-  variables = "B19013_001",
-  state = "NY",
-  county = "New York",
-  year = 2020,
-  geometry = TRUE,
-  cb = FALSE
-) %>%
-  st_transform(26918) %>%
-  erase_water(year = 2020)
-
-ggplot(ny_erase, aes(fill = estimate)) + 
-  geom_sf() + 
-  theme_void() + 
-  scale_fill_viridis_c(labels = scales::dollar)
+library(tigris) +library(sf) + +ny_erase <- get_acs( + geography = "tract", + variables = "B19013_001", + state = "NY", + county = "New York", + year = 2020, + geometry = TRUE, + cb = FALSE +) %>% + st_transform(26918) %>% + erase_water(year = 2020) +
## 
+  |                                                                            
+  |                                                                      |   0%
+  |                                                                            
+  |                                                                      |   1%
+  |                                                                            
+  |=                                                                     |   1%
+  |                                                                            
+  |=                                                                     |   2%
+  |                                                                            
+  |==                                                                    |   2%
+  |                                                                            
+  |==                                                                    |   3%
+  |                                                                            
+  |===                                                                   |   4%
+  |                                                                            
+  |===                                                                   |   5%
+  |                                                                            
+  |====                                                                  |   5%
+  |                                                                            
+  |====                                                                  |   6%
+  |                                                                            
+  |=====                                                                 |   6%
+  |                                                                            
+  |=====                                                                 |   7%
+  |                                                                            
+  |=====                                                                 |   8%
+  |                                                                            
+  |======                                                                |   8%
+  |                                                                            
+  |======                                                                |   9%
+  |                                                                            
+  |=======                                                               |  10%
+  |                                                                            
+  |=======                                                               |  11%
+  |                                                                            
+  |========                                                              |  11%
+  |                                                                            
+  |========                                                              |  12%
+  |                                                                            
+  |=========                                                             |  12%
+  |                                                                            
+  |=========                                                             |  13%
+  |                                                                            
+  |=========                                                             |  14%
+  |                                                                            
+  |==========                                                            |  14%
+  |                                                                            
+  |==========                                                            |  15%
+  |                                                                            
+  |===========                                                           |  15%
+  |                                                                            
+  |===========                                                           |  16%
+  |                                                                            
+  |============                                                          |  17%
+  |                                                                            
+  |=============                                                         |  18%
+  |                                                                            
+  |=============                                                         |  19%
+  |                                                                            
+  |==============                                                        |  20%
+  |                                                                            
+  |===============                                                       |  21%
+  |                                                                            
+  |================                                                      |  22%
+  |                                                                            
+  |================                                                      |  23%
+  |                                                                            
+  |=================                                                     |  24%
+  |                                                                            
+  |=================                                                     |  25%
+  |                                                                            
+  |==================                                                    |  25%
+  |                                                                            
+  |==================                                                    |  26%
+  |                                                                            
+  |===================                                                   |  27%
+  |                                                                            
+  |===================                                                   |  28%
+  |                                                                            
+  |====================                                                  |  28%
+  |                                                                            
+  |====================                                                  |  29%
+  |                                                                            
+  |=====================                                                 |  29%
+  |                                                                            
+  |=====================                                                 |  30%
+  |                                                                            
+  |======================                                                |  31%
+  |                                                                            
+  |======================                                                |  32%
+  |                                                                            
+  |=======================                                               |  32%
+  |                                                                            
+  |=======================                                               |  33%
+  |                                                                            
+  |==============================                                        |  43%
+  |                                                                            
+  |==============================                                        |  44%
+  |                                                                            
+  |===============================                                       |  44%
+  |                                                                            
+  |===============================                                       |  45%
+  |                                                                            
+  |================================                                      |  45%
+  |                                                                            
+  |=================================                                     |  47%
+  |                                                                            
+  |====================================                                  |  51%
+  |                                                                            
+  |====================================                                  |  52%
+  |                                                                            
+  |========================================                              |  57%
+  |                                                                            
+  |========================================                              |  58%
+  |                                                                            
+  |==============================================                        |  65%
+  |                                                                            
+  |=====================================================                 |  75%
+  |                                                                            
+  |===========================================================           |  85%
+  |                                                                            
+  |============================================================          |  86%
+  |                                                                            
+  |=============================================================         |  88%
+  |                                                                            
+  |==============================================================        |  88%
+  |                                                                            
+  |================================================================      |  91%
+  |                                                                            
+  |====================================================================  |  97%
+  |                                                                            
+  |======================================================================| 100%
+
+ggplot(ny_erase, aes(fill = estimate)) + 
+  geom_sf() + 
+  theme_void() + 
+  scale_fill_viridis_c(labels = scales::dollar)

The map appears as before, but instead the polygons now hug the shoreline of Manhattan. Setting the same year in @@ -325,9 +458,9 @@

Writing to shapefilesst_write function in the sf package:

-
-library(sf)
-st_write(orange, "orange.shp")
+
+library(sf)
+st_write(orange, "orange.shp")

Your tidycensus-obtained dataset can now be used in ArcGIS, QGIS, Tableau, or any other application that reads shapefiles.

diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png index 8efe14c..c8ffd65 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png index 20d0745..ef83408 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png index 3fe76a9..38df762 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png index 7dd9348..fb94b56 100644 Binary files a/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/spatial-data_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/authors.html b/docs/authors.html index 8ef37ea..e410b18 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1 @@ -97,15 +97,15 @@

Citation

-

Walker K, Herman M (2023). +

Walker K, Herman M (2024). tidycensus: Load US Census Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data Frames. -R package version 1.5, https://walker-data.com/tidycensus/. +R package version 1.6.1, https://walker-data.com/tidycensus/.

@Manual{,
   title = {tidycensus: Load US Census Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data Frames},
   author = {Kyle Walker and Matt Herman},
-  year = {2023},
-  note = {R package version 1.5},
+  year = {2024},
+  note = {R package version 1.6.1},
   url = {https://walker-data.com/tidycensus/},
 }
diff --git a/docs/index.html b/docs/index.html index 6c1f644..795118d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -44,7 +44,7 @@ tidycensus - 1.5 + 1.6.1 @@ -112,7 +112,7 @@

R build status CRAN BadgeCRAN Downloads

tidycensus is an R package that allows users to interface with a select number of the US Census Bureau’s data APIs and return tidyverse-ready data frames, optionally with simple feature geometry included. Install from CRAN with the following command:

-install.packages("tidycensus")
+install.packages("tidycensus")

tidycensus is designed to help R users get Census data that is pre-prepared for exploration within the tidyverse, and optionally spatially with sf. To learn more about how the package works, plase read through the following articles:

-
data(acs5_geography)
+
data(acs5_geography)

Format

-

An object of class tbl_df (inherits from tbl, data.frame) with 11112 rows and 3 columns.

+

An object of class tbl_df (inherits from tbl, data.frame) with 12228 rows and 3 columns.

Details

diff --git a/docs/reference/as_dot_density.html b/docs/reference/as_dot_density.html index 50480d5..214ee58 100644 --- a/docs/reference/as_dot_density.html +++ b/docs/reference/as_dot_density.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,15 +81,15 @@

Convert polygon geometry to dots for dot-density mapping

-
as_dot_density(
-  input_data,
-  value,
-  values_per_dot,
-  group = NULL,
-  erase_water = FALSE,
-  area_threshold = NULL,
-  water_year = 2020
-)
+
as_dot_density(
+  input_data,
+  value,
+  values_per_dot,
+  group = NULL,
+  erase_water = FALSE,
+  area_threshold = NULL,
+  water_year = 2020
+)
@@ -136,51 +136,51 @@

Details

Examples

-
if (FALSE) {
-
-library(tidycensus)
-library(ggplot2)
-
-# Identify variables for mapping
-race_vars <- c(
-  Hispanic = "P2_002N",
-  White = "P2_005N",
-  Black = "P2_006N",
-  Asian = "P2_008N"
-)
-
-# Get data from tidycensus
-baltimore_race <- get_decennial(
-  geography = "tract",
-  variables = race_vars,
-  state = "MD",
-  county = "Baltimore city",
-  geometry = TRUE,
-  year = 2020
-)
-
-# Convert data to dots
-baltimore_dots <- as_dot_density(
-  baltimore_race,
-  value = "value",
-  values_per_dot = 100,
-  group = "variable"
-)
-
-# Use one set of polygon geometries as a base layer
-baltimore_base <- baltimore_race[baltimore_race$variable == "Hispanic", ]
-
-# Map with ggplot2
-ggplot() +
-  geom_sf(data = baltimore_base,
-          fill = "white",
-          color = "grey") +
-  geom_sf(data = baltimore_dots,
-          aes(color = variable),
-          size = 0.01) +
-  theme_void()
-
-}
+    
if (FALSE) {
+
+library(tidycensus)
+library(ggplot2)
+
+# Identify variables for mapping
+race_vars <- c(
+  Hispanic = "P2_002N",
+  White = "P2_005N",
+  Black = "P2_006N",
+  Asian = "P2_008N"
+)
+
+# Get data from tidycensus
+baltimore_race <- get_decennial(
+  geography = "tract",
+  variables = race_vars,
+  state = "MD",
+  county = "Baltimore city",
+  geometry = TRUE,
+  year = 2020
+)
+
+# Convert data to dots
+baltimore_dots <- as_dot_density(
+  baltimore_race,
+  value = "value",
+  values_per_dot = 100,
+  group = "variable"
+)
+
+# Use one set of polygon geometries as a base layer
+baltimore_base <- baltimore_race[baltimore_race$variable == "Hispanic", ]
+
+# Map with ggplot2
+ggplot() +
+  geom_sf(data = baltimore_base,
+          fill = "white",
+          color = "grey") +
+  geom_sf(data = baltimore_dots,
+          aes(color = variable),
+          size = 0.01) +
+  theme_void()
+
+}
 
diff --git a/docs/reference/census_api_key.html b/docs/reference/census_api_key.html index 8f77715..0fc94cd 100644 --- a/docs/reference/census_api_key.html +++ b/docs/reference/census_api_key.html @@ -21,7 +21,7 @@ tidycensus - 1.5 + 1.6.1
@@ -89,7 +89,7 @@

Install a CENSUS API Key in Your .Renviron File for Repeated Us
-
census_api_key(key, overwrite = FALSE, install = FALSE)
+
census_api_key(key, overwrite = FALSE, install = FALSE)
@@ -109,23 +109,23 @@

Arguments

Examples

-

-if (FALSE) {
-census_api_key("111111abc", install = TRUE)
-# First time, reload your environment so you can use the key without restarting R.
-readRenviron("~/.Renviron")
-# You can check it with:
-Sys.getenv("CENSUS_API_KEY")
-}
-
-if (FALSE) {
-# If you need to overwrite an existing key:
-census_api_key("111111abc", overwrite = TRUE, install = TRUE)
-# First time, relead your environment so you can use the key without restarting R.
-readRenviron("~/.Renviron")
-# You can check it with:
-Sys.getenv("CENSUS_API_KEY")
-}
+    

+if (FALSE) {
+census_api_key("111111abc", install = TRUE)
+# First time, reload your environment so you can use the key without restarting R.
+readRenviron("~/.Renviron")
+# You can check it with:
+Sys.getenv("CENSUS_API_KEY")
+}
+
+if (FALSE) {
+# If you need to overwrite an existing key:
+census_api_key("111111abc", overwrite = TRUE, install = TRUE)
+# First time, relead your environment so you can use the key without restarting R.
+readRenviron("~/.Renviron")
+# You can check it with:
+Sys.getenv("CENSUS_API_KEY")
+}
 
diff --git a/docs/reference/check_ddhca_groups.html b/docs/reference/check_ddhca_groups.html index 7f87062..b69b310 100644 --- a/docs/reference/check_ddhca_groups.html +++ b/docs/reference/check_ddhca_groups.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Check to see if a given geography / population group combination is availabl
-
check_ddhca_groups(geography, pop_group, state = NULL, county = NULL)
+
check_ddhca_groups(geography, pop_group, state = NULL, county = NULL)
diff --git a/docs/reference/county_laea.html b/docs/reference/county_laea.html index 5dc261c..576b449 100644 --- a/docs/reference/county_laea.html +++ b/docs/reference/county_laea.html @@ -18,7 +18,7 @@ tidycensus - 1.5 + 1.6.1
@@ -83,9 +83,9 @@

County geometry with Alaska and Hawaii shifted and re-scaled

-
data(county_laea)
-
-data(county_laea)
+
data(county_laea)
+
+data(county_laea)
diff --git a/docs/reference/fips_codes.html b/docs/reference/fips_codes.html index dcb9c42..b959846 100644 --- a/docs/reference/fips_codes.html +++ b/docs/reference/fips_codes.html @@ -26,7 +26,7 @@ tidycensus - 1.5 + 1.6.1
@@ -96,7 +96,7 @@

Dataset with FIPS codes for US states and counties

-
data(fips_codes)
+
data(fips_codes)
diff --git a/docs/reference/get_acs.html b/docs/reference/get_acs.html index 3067fc7..f4b3d29 100644 --- a/docs/reference/get_acs.html +++ b/docs/reference/get_acs.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,26 +81,26 @@

Obtain data and feature geometry for the American Community Survey

-
get_acs(
-  geography,
-  variables = NULL,
-  table = NULL,
-  cache_table = FALSE,
-  year = 2021,
-  output = "tidy",
-  state = NULL,
-  county = NULL,
-  zcta = NULL,
-  geometry = FALSE,
-  keep_geo_vars = FALSE,
-  shift_geo = FALSE,
-  summary_var = NULL,
-  key = NULL,
-  moe_level = 90,
-  survey = "acs5",
-  show_call = FALSE,
-  ...
-)
+
get_acs(
+  geography,
+  variables = NULL,
+  table = NULL,
+  cache_table = FALSE,
+  year = 2022,
+  output = "tidy",
+  state = NULL,
+  county = NULL,
+  zcta = NULL,
+  geometry = FALSE,
+  keep_geo_vars = FALSE,
+  shift_geo = FALSE,
+  summary_var = NULL,
+  key = NULL,
+  moe_level = 90,
+  survey = "acs5",
+  show_call = FALSE,
+  ...
+)
@@ -131,8 +131,8 @@

Arguments

year

The year, or endyear, of the ACS sample. 5-year ACS data is -available from 2009 through 2021; 1-year ACS data is available from 2005 -through 2021, with the exception of 2020. Defaults to 2021.

+available from 2009 through 2022; 1-year ACS data is available from 2005 +through 2022, with the exception of 2020. Defaults to 2022.

output
@@ -219,35 +219,35 @@

Value

Examples

-
if (FALSE) {
-library(tidycensus)
-library(tidyverse)
-library(viridis)
-census_api_key("YOUR KEY GOES HERE")
-
-tarr <- get_acs(geography = "tract", variables = "B19013_001",
-                state = "TX", county = "Tarrant", geometry = TRUE, year = 2020)
-
-ggplot(tarr, aes(fill = estimate, color = estimate)) +
-  geom_sf() +
-  coord_sf(crs = 26914) +
-  scale_fill_viridis(option = "magma") +
-  scale_color_viridis(option = "magma")
-
-
-vt <- get_acs(geography = "county", variables = "B19013_001", state = "VT", year = 2019)
-
-vt %>%
-mutate(NAME = gsub(" County, Vermont", "", NAME)) %>%
- ggplot(aes(x = estimate, y = reorder(NAME, estimate))) +
-  geom_errorbar(aes(xmin = estimate - moe, xmax = estimate + moe), width = 0.3, size = 0.5) +
-  geom_point(color = "red", size = 3) +
-  labs(title = "Household income by county in Vermont",
-       subtitle = "2015-2019 American Community Survey",
-       y = "",
-       x = "ACS estimate (bars represent margin of error)")
-
-}
+    
if (FALSE) {
+library(tidycensus)
+library(tidyverse)
+library(viridis)
+census_api_key("YOUR KEY GOES HERE")
+
+tarr <- get_acs(geography = "tract", variables = "B19013_001",
+                state = "TX", county = "Tarrant", geometry = TRUE, year = 2020)
+
+ggplot(tarr, aes(fill = estimate, color = estimate)) +
+  geom_sf() +
+  coord_sf(crs = 26914) +
+  scale_fill_viridis(option = "magma") +
+  scale_color_viridis(option = "magma")
+
+
+vt <- get_acs(geography = "county", variables = "B19013_001", state = "VT", year = 2019)
+
+vt %>%
+mutate(NAME = gsub(" County, Vermont", "", NAME)) %>%
+ ggplot(aes(x = estimate, y = reorder(NAME, estimate))) +
+  geom_errorbar(aes(xmin = estimate - moe, xmax = estimate + moe), width = 0.3, size = 0.5) +
+  geom_point(color = "red", size = 3) +
+  labs(title = "Household income by county in Vermont",
+       subtitle = "2015-2019 American Community Survey",
+       y = "",
+       x = "ACS estimate (bars represent margin of error)")
+
+}
 
diff --git a/docs/reference/get_decennial.html b/docs/reference/get_decennial.html index a902b96..36d9db3 100644 --- a/docs/reference/get_decennial.html +++ b/docs/reference/get_decennial.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,26 +81,26 @@

Obtain data and feature geometry for the decennial US Census

-
get_decennial(
-  geography,
-  variables = NULL,
-  table = NULL,
-  cache_table = FALSE,
-  year = 2020,
-  sumfile = NULL,
-  state = NULL,
-  county = NULL,
-  geometry = FALSE,
-  output = "tidy",
-  keep_geo_vars = FALSE,
-  shift_geo = FALSE,
-  summary_var = NULL,
-  pop_group = NULL,
-  pop_group_label = FALSE,
-  key = NULL,
-  show_call = FALSE,
-  ...
-)
+
get_decennial(
+  geography,
+  variables = NULL,
+  table = NULL,
+  cache_table = FALSE,
+  year = 2020,
+  sumfile = NULL,
+  state = NULL,
+  county = NULL,
+  geometry = FALSE,
+  output = "tidy",
+  keep_geo_vars = FALSE,
+  shift_geo = FALSE,
+  summary_var = NULL,
+  pop_group = NULL,
+  pop_group_label = FALSE,
+  key = NULL,
+  show_call = FALSE,
+  ...
+)
@@ -213,24 +213,24 @@

Value

Examples

-
if (FALSE) {
-# Plot of race/ethnicity by county in Illinois for 2010
-library(tidycensus)
-library(tidyverse)
-library(viridis)
-census_api_key("YOUR KEY GOES HERE")
-vars10 <- c("P005003", "P005004", "P005006", "P004003")
-
-il <- get_decennial(geography = "county", variables = vars10, year = 2010,
-                    summary_var = "P001001", state = "IL", geometry = TRUE) %>%
-  mutate(pct = 100 * (value / summary_value))
-
-ggplot(il, aes(fill = pct, color = pct)) +
-  geom_sf() +
-  facet_wrap(~variable)
-
-
-}
+    
if (FALSE) {
+# Plot of race/ethnicity by county in Illinois for 2010
+library(tidycensus)
+library(tidyverse)
+library(viridis)
+census_api_key("YOUR KEY GOES HERE")
+vars10 <- c("P005003", "P005004", "P005006", "P004003")
+
+il <- get_decennial(geography = "county", variables = vars10, year = 2010,
+                    summary_var = "P001001", state = "IL", geometry = TRUE) %>%
+  mutate(pct = 100 * (value / summary_value))
+
+ggplot(il, aes(fill = pct, color = pct)) +
+  geom_sf() +
+  facet_wrap(~variable)
+
+
+}
 
diff --git a/docs/reference/get_estimates.html b/docs/reference/get_estimates.html index dd6ebee..647f222 100644 --- a/docs/reference/get_estimates.html +++ b/docs/reference/get_estimates.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,27 +81,27 @@

Get data from the US Census Bureau Population Estimates Program

-
get_estimates(
-  geography = c("us", "region", "division", "state", "county", "county subdivision",
-    "place/balance (or part)", "place", "consolidated city", "place (or part)",
-    "metropolitan statistical area/micropolitan statistical area", "cbsa",
-    "metropolitan division", "combined statistical area"),
-  product = NULL,
-  variables = NULL,
-  breakdown = NULL,
-  breakdown_labels = FALSE,
-  year = 2022,
-  state = NULL,
-  county = NULL,
-  time_series = FALSE,
-  output = "tidy",
-  geometry = FALSE,
-  keep_geo_vars = FALSE,
-  shift_geo = FALSE,
-  key = NULL,
-  show_call = FALSE,
-  ...
-)
+
get_estimates(
+  geography = c("us", "region", "division", "state", "county", "county subdivision",
+    "place/balance (or part)", "place", "consolidated city", "place (or part)",
+    "metropolitan statistical area/micropolitan statistical area", "cbsa",
+    "metropolitan division", "combined statistical area"),
+  product = NULL,
+  variables = NULL,
+  breakdown = NULL,
+  breakdown_labels = FALSE,
+  year = 2022,
+  state = NULL,
+  county = NULL,
+  time_series = FALSE,
+  output = "tidy",
+  geometry = FALSE,
+  keep_geo_vars = FALSE,
+  shift_geo = FALSE,
+  key = NULL,
+  show_call = FALSE,
+  ...
+)
diff --git a/docs/reference/get_flows.html b/docs/reference/get_flows.html index 709559b..14e5cbc 100644 --- a/docs/reference/get_flows.html +++ b/docs/reference/get_flows.html @@ -20,7 +20,7 @@ tidycensus - 1.5 + 1.6.1
@@ -86,21 +86,21 @@

Obtain data and feature geometry for American Community Survey Migration
-
get_flows(
-  geography,
-  variables = NULL,
-  breakdown = NULL,
-  breakdown_labels = FALSE,
-  year = 2018,
-  output = "tidy",
-  state = NULL,
-  county = NULL,
-  msa = NULL,
-  geometry = FALSE,
-  key = NULL,
-  moe_level = 90,
-  show_call = FALSE
-)
+
get_flows(
+  geography,
+  variables = NULL,
+  breakdown = NULL,
+  breakdown_labels = FALSE,
+  year = 2018,
+  output = "tidy",
+  state = NULL,
+  county = NULL,
+  msa = NULL,
+  geometry = FALSE,
+  key = NULL,
+  moe_level = 90,
+  show_call = FALSE
+)
@@ -206,31 +206,31 @@

Value

Examples

-
if (FALSE) {
-get_flows(
-  geography = "county",
-  state = "VT",
-  county = c("Washington", "Chittenden")
-  )
-
-get_flows(
-  geography = "county subdivision",
-  breakdown = "RACE",
-  breakdown_labels = TRUE,
-  state = "NY",
-  county = "Westchester",
-  output = "wide",
-  year = 2015
-  )
-
-get_flows(
-   geography = "metropolitan statistical area",
-   variables = c("POP1YR", "POP1YRAGO"),
-   geometry = TRUE,
-   output = "wide",
-   show_call = TRUE
-  )
-}
+    
if (FALSE) {
+get_flows(
+  geography = "county",
+  state = "VT",
+  county = c("Washington", "Chittenden")
+  )
+
+get_flows(
+  geography = "county subdivision",
+  breakdown = "RACE",
+  breakdown_labels = TRUE,
+  state = "NY",
+  county = "Westchester",
+  output = "wide",
+  year = 2015
+  )
+
+get_flows(
+   geography = "metropolitan statistical area",
+   variables = c("POP1YR", "POP1YRAGO"),
+   geometry = TRUE,
+   output = "wide",
+   show_call = TRUE
+  )
+}
 
diff --git a/docs/reference/get_pop_groups.html b/docs/reference/get_pop_groups.html index 62070dd..8ce9ecc 100644 --- a/docs/reference/get_pop_groups.html +++ b/docs/reference/get_pop_groups.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Get available population groups for a given Decennial Census year and summar
-
get_pop_groups(year, sumfile)
+
get_pop_groups(year, sumfile)
diff --git a/docs/reference/get_pums.html b/docs/reference/get_pums.html index bfb7780..28598df 100644 --- a/docs/reference/get_pums.html +++ b/docs/reference/get_pums.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,19 +81,19 @@

Load data from the American Community Survey Public Use Microdata Series API
-
get_pums(
-  variables = NULL,
-  state = NULL,
-  puma = NULL,
-  year = 2021,
-  survey = "acs5",
-  variables_filter = NULL,
-  rep_weights = NULL,
-  recode = FALSE,
-  return_vacant = FALSE,
-  show_call = FALSE,
-  key = NULL
-)
+
get_pums(
+  variables = NULL,
+  state = NULL,
+  puma = NULL,
+  year = 2022,
+  survey = "acs5",
+  variables_filter = NULL,
+  rep_weights = NULL,
+  recode = FALSE,
+  return_vacant = FALSE,
+  show_call = FALSE,
+  key = NULL
+)
@@ -116,7 +116,7 @@

Arguments

year

The data year of the 1-year ACS sample or the endyear of the -5-year sample. Defaults to 2020. Please note that 1-year data for 2020 is not available +5-year sample. Defaults to 2022. Please note that 1-year data for 2020 is not available in tidycensus, so users requesting 1-year data should supply a different year.

@@ -143,7 +143,7 @@

Arguments

recode

If TRUE, recodes variable values using Census data dictionary and creates a new *_label column for each variable that is recoded. -Available for 2017 - 2021 data. Defaults to FALSE.

+Available for 2017 - 2022 data. Defaults to FALSE.

return_vacant
@@ -175,13 +175,13 @@

Value

Examples

-
if (FALSE) {
-get_pums(variables = "AGEP", state = "VT")
-get_pums(variables = "AGEP", state = "multiple", puma = c("UT" = 35008, "NV" = 00403))
-get_pums(variables = c("AGEP", "ANC1P"), state = "VT", recode = TRUE)
-get_pums(variables = "AGEP", state = "VT", survey = "acs1", rep_weights = "person")
-}
-
+    
if (FALSE) {
+get_pums(variables = "AGEP", state = "VT")
+get_pums(variables = "AGEP", state = "multiple", puma = c("UT" = 35008, "NV" = 00403))
+get_pums(variables = c("AGEP", "ANC1P"), state = "VT", recode = TRUE)
+get_pums(variables = "AGEP", state = "VT", survey = "acs1", rep_weights = "person")
+}
+
 
diff --git a/docs/reference/index.html b/docs/reference/index.html index a750546..42fce35 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
diff --git a/docs/reference/interpolate_pw.html b/docs/reference/interpolate_pw.html index 34947c2..3bb64e0 100644 --- a/docs/reference/interpolate_pw.html +++ b/docs/reference/interpolate_pw.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1 @@ -81,16 +81,16 @@

Use population-weighted interpolation to transfer information from one set o
-
interpolate_pw(
-  from,
-  to,
-  to_id = NULL,
-  extensive,
-  weights,
-  weight_column = NULL,
-  weight_placement = c("surface", "centroid"),
-  crs = NULL
-)
+
interpolate_pw(
+  from,
+  to,
+  to_id = NULL,
+  extensive,
+  weights,
+  weight_column = NULL,
+  weight_placement = c("surface", "centroid"),
+  crs = NULL
+)
@@ -142,50 +142,50 @@

Details

Examples

-
if (FALSE) {
-# Example: interpolating work-from-home from 2011-2015 ACS
-# to 2020 shapes
-library(tidycensus)
-library(tidyverse)
-library(tigris)
-options(tigris_use_cache = TRUE)
-
-wfh_15 <- get_acs(
-  geography = "tract",
-  variables = "B08006_017",
-  year = 2015,
-  state = "AZ",
-  county = "Maricopa",
-  geometry = TRUE
-) %>%
-select(estimate)
-
-wfh_20 <- get_acs(
-  geography = "tract",
-  variables = "B08006_017",
-  year = 2020,
-  state = "AZ",
-  county = "Maricopa",
-  geometry = TRUE
- )
-
-maricopa_blocks <- blocks(
-  "AZ",
-  "Maricopa",
-  year = 2020
-)
-
-wfh_15_to_20 <- interpolate_pw(
-  from = wfh_15,
-  to = wfh_20,
-  to_id = "GEOID",
-  weights = maricopa_blocks,
-  weight_column = "POP20",
-  crs = 26949,
-  extensive = TRUE
-)
-
-}
+    
if (FALSE) {
+# Example: interpolating work-from-home from 2011-2015 ACS
+# to 2020 shapes
+library(tidycensus)
+library(tidyverse)
+library(tigris)
+options(tigris_use_cache = TRUE)
+
+wfh_15 <- get_acs(
+  geography = "tract",
+  variables = "B08006_017",
+  year = 2015,
+  state = "AZ",
+  county = "Maricopa",
+  geometry = TRUE
+) %>%
+select(estimate)
+
+wfh_20 <- get_acs(
+  geography = "tract",
+  variables = "B08006_017",
+  year = 2020,
+  state = "AZ",
+  county = "Maricopa",
+  geometry = TRUE
+ )
+
+maricopa_blocks <- blocks(
+  "AZ",
+  "Maricopa",
+  year = 2020
+)
+
+wfh_15_to_20 <- interpolate_pw(
+  from = wfh_15,
+  to = wfh_20,
+  to_id = "GEOID",
+  weights = maricopa_blocks,
+  weight_column = "POP20",
+  crs = 26949,
+  extensive = TRUE
+)
+
+}
 
diff --git a/docs/reference/load_variables.html b/docs/reference/load_variables.html index 7334b31..391b4b1 100644 --- a/docs/reference/load_variables.html +++ b/docs/reference/load_variables.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,19 +81,19 @@

Load variables from a decennial Census or American Community Survey dataset
-
load_variables(
-  year,
-  dataset = c("sf1", "sf2", "sf3", "sf4", "pl", "dhc", "dp", "ddhca", "as", "gu", "mp",
-    "vi", "acsse", "dpas", "dpgu", "dpmp", "dpvi", "dhcvi", "dhcgu", "dhcvi", "dhcas",
-    "acs1", "acs3", "acs5", "acs1/profile", "acs3/profile", "acs5/profile",
-    "acs1/subject", "acs3/subject", "acs5/subject", "acs1/cprofile", "acs5/cprofile",
-    "sf2profile", "sf3profile", "sf4profile", "aian", "aianprofile", "cd110h", "cd110s",
-    "cd110hprofile", "cd110sprofile", "sldh", "slds", "sldhprofile", "sldsprofile",
-    "cqr", "cd113", "cd113profile", 
-     "cd115", "cd115profile", "cd116", "plnat",
-    "cd118"),
-  cache = FALSE
-)
+
load_variables(
+  year,
+  dataset = c("sf1", "sf2", "sf3", "sf4", "pl", "dhc", "dp", "ddhca", "as", "gu", "mp",
+    "vi", "acsse", "dpas", "dpgu", "dpmp", "dpvi", "dhcvi", "dhcgu", "dhcvi", "dhcas",
+    "acs1", "acs3", "acs5", "acs1/profile", "acs3/profile", "acs5/profile",
+    "acs1/subject", "acs3/subject", "acs5/subject", "acs1/cprofile", "acs5/cprofile",
+    "sf2profile", "sf3profile", "sf4profile", "aian", "aianprofile", "cd110h", "cd110s",
+    "cd110hprofile", "cd110sprofile", "sldh", "slds", "sldhprofile", "sldsprofile",
+    "cqr", "cd113", "cd113profile", 
+     "cd115", "cd115profile", "cd116", "plnat",
+    "cd118"),
+  cache = FALSE
+)
@@ -140,10 +140,10 @@

Details

Examples

-
if (FALSE) {
-v15 <- load_variables(2015, "acs5", cache = TRUE)
-View(v15)
-}
+    
if (FALSE) {
+v15 <- load_variables(2015, "acs5", cache = TRUE)
+View(v15)
+}
 
diff --git a/docs/reference/mig_recodes.html b/docs/reference/mig_recodes.html index ae3b9da..a214cd5 100644 --- a/docs/reference/mig_recodes.html +++ b/docs/reference/mig_recodes.html @@ -24,7 +24,7 @@ tidycensus - 1.5 + 1.6.1
@@ -92,7 +92,7 @@

Dataset with Migration Flows characteristic recodes

-
data(mig_recodes)
+
data(mig_recodes)
diff --git a/docs/reference/moe_product.html b/docs/reference/moe_product.html index d17a8f8..bdb56f4 100644 --- a/docs/reference/moe_product.html +++ b/docs/reference/moe_product.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Calculate the margin of error for a derived product

-
moe_product(est1, est2, moe1, moe2)
+
moe_product(est1, est2, moe1, moe2)
diff --git a/docs/reference/moe_prop.html b/docs/reference/moe_prop.html index b5dfa40..993473b 100644 --- a/docs/reference/moe_prop.html +++ b/docs/reference/moe_prop.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Calculate the margin of error for a derived proportion

-
moe_prop(num, denom, moe_num, moe_denom)
+
moe_prop(num, denom, moe_num, moe_denom)
diff --git a/docs/reference/moe_ratio.html b/docs/reference/moe_ratio.html index 70efae2..6a1e401 100644 --- a/docs/reference/moe_ratio.html +++ b/docs/reference/moe_ratio.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Calculate the margin of error for a derived ratio

-
moe_ratio(num, denom, moe_num, moe_denom)
+
moe_ratio(num, denom, moe_num, moe_denom)
diff --git a/docs/reference/moe_sum.html b/docs/reference/moe_sum.html index ce43146..8bf7dec 100644 --- a/docs/reference/moe_sum.html +++ b/docs/reference/moe_sum.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Calculate the margin of error for a derived sum

-
moe_sum(moe, estimate = NULL, na.rm = FALSE)
+
moe_sum(moe, estimate = NULL, na.rm = FALSE)
diff --git a/docs/reference/pums_variables.html b/docs/reference/pums_variables.html index a44ecab..1591c7a 100644 --- a/docs/reference/pums_variables.html +++ b/docs/reference/pums_variables.html @@ -33,7 +33,7 @@ tidycensus - 1.5 + 1.6.1
@@ -110,12 +110,12 @@

Dataset with PUMS variables and codes

-
data(pums_variables)
+
data(pums_variables)

Format

-

An object of class tbl_df (inherits from tbl, data.frame) with 47803 rows and 12 columns.

+

An object of class tbl_df (inherits from tbl, data.frame) with 58576 rows and 12 columns.

Details

diff --git a/docs/reference/significance.html b/docs/reference/significance.html index 30c701e..2d316b9 100644 --- a/docs/reference/significance.html +++ b/docs/reference/significance.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Evaluate whether the difference in two estimates is statistically significan
-
significance(est1, est2, moe1, moe2, clevel = 0.9)
+
significance(est1, est2, moe1, moe2, clevel = 0.9)
diff --git a/docs/reference/state_laea.html b/docs/reference/state_laea.html index c7056d3..fdcc53c 100644 --- a/docs/reference/state_laea.html +++ b/docs/reference/state_laea.html @@ -18,7 +18,7 @@ tidycensus - 1.5 + 1.6.1
@@ -83,9 +83,9 @@

State geometry with Alaska and Hawaii shifted and re-scaled

-
data(state_laea)
-
-data(state_laea)
+
data(state_laea)
+
+data(state_laea)
diff --git a/docs/reference/summary_files.html b/docs/reference/summary_files.html index 6a9eda0..ba7e98e 100644 --- a/docs/reference/summary_files.html +++ b/docs/reference/summary_files.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
@@ -81,7 +81,7 @@

Identify summary files for a given decennial Census year

-
summary_files(year)
+
summary_files(year)
diff --git a/docs/reference/tidycensus.html b/docs/reference/tidycensus.html index 092f036..aca9086 100644 --- a/docs/reference/tidycensus.html +++ b/docs/reference/tidycensus.html @@ -17,7 +17,7 @@ tidycensus - 1.5 + 1.6.1
diff --git a/docs/reference/to_survey.html b/docs/reference/to_survey.html index 28dd7bc..5c2305b 100644 --- a/docs/reference/to_survey.html +++ b/docs/reference/to_survey.html @@ -22,7 +22,7 @@ tidycensus - 1.5 + 1.6.1 @@ -91,12 +91,12 @@

Convert a data frame returned by get_pums() to a survey object

-
to_survey(
-  df,
-  type = c("person", "housing"),
-  class = c("srvyr", "survey"),
-  design = "rep_weights"
-)
+
to_survey(
+  df,
+  type = c("person", "housing"),
+  class = c("srvyr", "survey"),
+  design = "rep_weights"
+)
@@ -118,7 +118,7 @@

Arguments

design

The survey design to use when creating a survey object. -Currently the only option is code"rep_weights"/.

+Currently the only option is "rep_weights".

@@ -130,11 +130,11 @@

Value

Examples

-
if (FALSE) {
-pums <- get_pums(variables = "AGEP", state = "VT", rep_weights = "person")
-pums_design <- to_survey(pums, type = "person", class = "srvyr")
-survey::svymean(~AGEP, pums_design)
-}
+    
if (FALSE) {
+pums <- get_pums(variables = "AGEP", state = "VT", rep_weights = "person")
+pums_design <- to_survey(pums, type = "person", class = "srvyr")
+survey::svymean(~AGEP, pums_design)
+}