-
Notifications
You must be signed in to change notification settings - Fork 0
/
Aim2.R
81 lines (60 loc) · 2.52 KB
/
Aim2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
source("utils.R")
library(tidyverse)
library(devtools)
install_github("vqv/ggbiplot")
library(ggbiplot)
setwd("~/work")
# Aim 2 -------------------------------------------------------------------
# Do neighborhoods with higher rates of people with disabilities
# have less access to mobility?
clean_data <- read.csv("clean_data/clean_data.csv")
access_factors <- clean_data %>%
select(c("disabled_population",
"subway",
"park",
"car_free_commute",
"mean_travel_time_to_work"))
access_exploratory <- access_factors %>%
gather(-disabled_population, key = "var", value = "value") %>%
ggplot(
aes(x = value,
y = disabled_population,
)) +
geom_point() +
stat_smooth() +
facet_wrap(~ var, scales = "free") +
labs(title = "Figure 2a: Exploratory Plot of Access Factors and % Disabled Population")
ggsave("figures/access_factors.png", access_exploratory)
#Places with more disabled populations don't have better access to transportation or parks.
#PCA
access.pca <- prcomp(access_factors, center = TRUE, scale. = TRUE)
summary(access.pca)
access.pca.plot <-
ggbiplot(access.pca, ellipse=TRUE, obs.scale = 1, var.scale = 1, groups = clean_data$boro_names) +
ggtitle("Principal Component Analyses: Disabilities Access") +
theme_minimal() +
labs(title = "Figure 2b: PCA Plot of Access Factors, Grouped by Boro")
ggsave("figures/access_pca.png", access.pca.plot)
#Let's remove disabled population from the PCA and use that as the groups instead of boro.
# I will add a new variable for neighborhoods that are in the top quartile for highest proportion of people with disabilities
cat_dis <- access_factors %>%
mutate(quartile = ntile(disabled_population, 4))
cat_dis$quartile <- factor(cat_dis$quartile,
levels = c(1,2,3,4),
labels = c("Lowest % Disabled", "Middle 50%", "Middle 50%", "Highest % Disabled"))
access_factors_nodis <- clean_data %>%
select(c("subway",
"park",
"car_free_commute",
"mean_travel_time_to_work"))
access.pca.nodis <- prcomp(access_factors_nodis, center = TRUE, scale. = TRUE)
summary(access.pca)
access.dis.pca.plot <-
ggbiplot(access.pca.nodis,
ellipse=TRUE,
obs.scale = 1,
var.scale = 1,
groups = cat_dis$quartile) +
theme(legend.position = "bottom") +
labs(title = "Figure 2c: PCA Plot of Access Factors, Grouped by % of Population Disabled")
ggsave("figures/access_dis_pca.png", access.dis.pca.plot)