-
Notifications
You must be signed in to change notification settings - Fork 0
/
01_source_me.R
360 lines (298 loc) · 13.9 KB
/
01_source_me.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
tictoc::tic()
#libraries------------
library("data.table")
library("tidytable")
library("lubridate")
library("here")
library("readxl")
library("readr")
library("stringr")
library("wrapR") # devtools::install_github("bcgov/wrapR")
library("janitor")
#constants------------
round_small <- 2 #round to 2 digits
round_medium <- -1 #round to nearest 10
round_large <- -3 #round to nearest 1000
# Functions------------------
source(here::here("R","functions.R"))
#read in dataframes--------------
ed_back <- read_excel(here("raw_data","!! LMO 2022 Edition Job openings by education type shares.xlsx"),range = "A4:K505")%>%
clean_names()%>%
filter(noc!="#T")%>%
select(-starts_with("paste"), -starts_with("noc_2016"), -description)
jo_raw <- vroom::vroom(here("raw_data",
list.files(here("raw_data"), pattern = "JO", ignore.case = FALSE)),
locale = readr::locale(encoding = "latin1"),
skip=3,
col_select = -1)
employment_raw <- vroom::vroom(here("raw_data",
list.files(here("raw_data"), pattern = "Emp")),
locale = readr::locale(encoding = "latin1"),
skip=3,
col_select = -1)
ds_raw <- vroom::vroom(here("raw_data",
list.files(here("raw_data"), pattern = "DS")),
locale = readr::locale(encoding = "latin1"),
skip=3,
col_select = -1)
noc_mapping <- vroom::vroom(here::here("raw_data","noc_mapping.csv"))
industry_mapping <- vroom::vroom(here::here("raw_data","industry_to_agg_mapping.csv"), delim=",")%>%
distinct()
typical_education <- read_excel(here::here("raw_data","Occupational Characteristics based on LMO 2022E 2022-Aug.xlsx"), skip=3, sheet = "Characteristics")%>%
janitor::clean_names()%>%
select(noc=noc_2016, Typical_Education=typical_education_background_2022_editon)
wages <- read_excel(here::here("raw_data","2021 Wages.xlsx"))%>%
janitor::clean_names()%>%
rename(noc=noc_2016)%>%
mutate(across(contains("wage"), ~if_else(.x > 3000, round(.x/1730,2), .x)))%>% #converts annual salary to hourly wage rate
mutate(across(contains("wage"), ~ na_if(., 0))) #replaces 0 wage with NA
interests <- read_excel(here::here("raw_data","Occupational Characteristics based on LMO 2022E 2022-Aug.xlsx"),
sheet="Characteristics",
skip=3)%>%
janitor::clean_names()%>%
select(noc=noc_2016,
interests)
whos_hoo <- read_excel(here::here("raw_data","HOO list 2022E.xlsx"))%>%
janitor::clean_names()%>%
mutate(hoo = if_else(high_opportunity_occupation=="Yes", TRUE, FALSE))%>%
select(-high_opportunity_occupation)
new_supply <- read_excel(here::here("raw_data","LMO-supply-composition-output-total10yr FINAL.xlsx"), skip=2)%>%
janitor::clean_names()%>%
select(-x1, -year)%>%
filter(noc!="#T")%>%
left_join(noc_mapping)%>%
select(-noc, -occupation_title)%>%
fix_col_names()%>%
camel_to_title()%>%
mutate(across(where(is.numeric), ~round(., round_medium)))%>%
select(-`Decline In Unemployment`)
new_supply%>%
write_csv(here::here("shiny_data","new_supply.csv")) #for new supply table
# we need current_year ASAP, so this code is ahead of where it is used in dashboard--------
#'jo_raw and emp_raw tibbles are wide and contain unwanted aggregates.
#' e.g. when we are breaking down by noc we want it to be for all industries and drop the aggregate NOC.
#' when we are breaking down by industry we want it to be for all NOC, and drop the all industry aggregate.
long_by_noc <- bind_long_by(jo_raw, employment_raw, "noc")%>%
select(-industry)
long_by_industry <- bind_long_by(jo_raw, employment_raw, "industry")%>%
select(-noc,-description)
current_year <- min(long_by_noc$date)
#for the job openings by education pie chart--------------------
jo_by_ed <- jo_raw%>%
filter(Variable == "Job Openings",
`Geographic Area` == "British Columbia",
Industry == "All industries",
NOC != "#T")%>%
select(-Variable, -`Geographic Area`, -Industry)%>%
pivot_longer(cols=-c(NOC,Description), names_to = "year")%>%
filter(year>min(year))%>%
clean_names()%>%
group_by(noc, description)%>%
summarize(value=sum(value))%>%
full_join(ed_back)%>%
mutate(across(c(no_certificate_diploma_or_degree,
secondary_high_school_diploma_or_equivalency_certificate,
postsecondary_certificate_or_diploma_below_bachelor_level,
apprenticeship_certificate,
diploma_certificate_excluding_apprenticeship,
university_certificate_diploma_or_degree_at_bachelor_level_or_above), ~ .x*value))%>%
adorn_totals()%>%
select(-contains("below"))%>%
filter(noc=="Total")%>%
ungroup()%>%
select(-noc, -description,-value)%>%
pivot_longer(cols=everything())%>%
mutate(value=round(value,-2))%>%
rename(`Job Openings`=value,
Typical_Education=name)%>%
wrapR::camel_to_title()%>%
mutate(Typical_Education=str_replace(Typical_Education, "No Certificate Diploma Or Degree", "Less than high school"),
Typical_Education=str_replace(Typical_Education, "Secondary High School Diploma Or Equivalency Certificate", "High school and/or occupation specific training"),
Typical_Education=str_replace(Typical_Education, "University Certificate Diploma Or Degree At Bachelor Level Or Above", "Bachelor's, graduate or professional degree")
)
write_csv(jo_by_ed, here("shiny_data","jo_by_ed.csv"))
# #ds_and_jo----------------
#' Highlights page 1 and Annual outlook are based on all_industry, all_NOC aggregates.
jo_total_noc <- jo_raw%>%
filter(Industry=="All industries",
NOC=="#T")%>%
select(-Industry)%>%
pivot_longer(cols=-c(NOC, Description, Variable, `Geographic Area`),
names_to = "date",
values_to = "value")%>%
mutate(date=as.numeric(date))%>%
clean_tbbl()%>%
filter(date>current_year)%>%
filter(variable %in% c("job_openings", "expansion_demand","replacement_demand"))%>%
pivot_wider(names_from = variable, values_from = value)%>%
select(-noc,-description)%>%
pivot_longer(cols=-c(date, geographic_area))
ds_total_noc <- ds_raw%>%
pivot_longer(cols=-c(NOC, Description, Industry, Variable, `Geographic Area`),
names_to = "date",
values_to = "value")%>%
clean_tbbl()%>%
mutate(date=as.numeric(as.character(date)))%>%
filter(date>current_year,
noc=="#t", #ds_raw contains ONLY the "all_industry" aggregate so no need to filter out industries
variable %in% c("deaths",
"retirements",
"new_entrants",
"net_international_in_migration",
"net_interregional_in_migration"))%>%
pivot_wider(names_from = variable, values_from = value)%>%
rename(young_people_starting_work=new_entrants,
immigrants=`net_international_in_migration`,
migrants_from_other_provinces=`net_interregional_in_migration`)%>%
select(-noc, -description, -industry)%>%
pivot_longer(cols=-c(date, geographic_area))
ds_and_jo <- bind_rows(jo_total_noc, ds_total_noc)%>%
pivot_wider(names_from = name, values_from = value)%>%
mutate(additional_supply_requirement = job_openings - immigrants - migrants_from_other_provinces - young_people_starting_work,
labour_force_exits = -1 * (deaths + retirements),
total_supply_additions = young_people_starting_work+ immigrants + migrants_from_other_provinces+ additional_supply_requirement
)%>%
pivot_longer(cols=-c(date, geographic_area))
ds_and_jo%>%
camel_to_title()%>%
write_csv(here::here("shiny_data","ds_and_jo.csv")) #annual outlook page: everything except bottom left donut.
#Highlights Part 1 and annual outlook page (bottom left donut)------------------
jo_total <- jo_total_noc%>%
filter(geographic_area=="british_columbia",
name=="job_openings",
date>current_year)%>%
summarize(value=round(sum(value), round_large))%>%
pull(value)
jo_tab <- ds_and_jo%>%
filter(name %in% c("job_openings","expansion_demand","replacement_demand"),
geographic_area=="british_columbia")%>%
group_by(name)%>%
summarize(value=round(sum(value), round_large))%>%
arrange(desc(value))%>%
mutate(percent=scales::percent(2*value/sum(value)),
name=str_to_title(str_replace_all(name,"_"," "))
)
ds_tab <- ds_and_jo%>%
filter(name %in% c("total_supply_additions",
"young_people_starting_work",
"immigrants",
"migrants_from_other_provinces",
"additional_supply_requirement"),
geographic_area=="british_columbia")%>%
group_by(name)%>%
summarize(value=round(sum(value), round_large))%>%
arrange(desc(value))%>%
mutate(percent=scales::percent(value/jo_total, accuracy = 1),
name=str_to_title(str_replace_all(name,"_"," "))
)
ds_and_jo_tab <- bind_rows(jo_tab, ds_tab)
ds_and_jo_tab%>%
write_csv(here::here("shiny_data","ds_and_jo_tab.csv")) #Highlights Part 1 and annual outlook page (bottom left donut)
#regional page------------
#' Highlights page 1 and Annual outlook are based on all_industry, all_NOC aggregates.
emp_total_noc <- employment_raw%>%
filter(Industry=="All industries",
NOC=="#T")%>%
select(-Industry)%>%
pivot_longer(cols=-c(NOC, Description, Variable, `Geographic Area`),
names_to = "date",
values_to = "value")%>%
mutate(date=as.numeric(date))%>%
clean_tbbl()%>%
select(-noc,-description)%>%
group_by(geographic_area)%>%
nest()%>%
mutate(current_employment=map_dbl(data, get_current, "employment"),
employment_growth=map_dbl(data, get_ten_cagr)
)%>%
select(-data)%>%
pivot_longer(cols=-geographic_area)
regional <- jo_total_noc%>%
filter(name %in% c("expansion_demand", "job_openings", "replacement_demand"))%>%
group_by(geographic_area, name)%>%
summarize(value=sum(value))%>%
bind_rows(emp_total_noc)%>%
bind_rows(bc_reg_pop())%>%
pivot_wider()%>%
mutate(employment_growth=employment_growth/100)%>%
mutate(across(c(expansion_demand, replacement_demand, current_employment), ~ .x/population, .names = "{.col}_per_capita"))%>%
select(-population)%>%
pivot_longer(cols=-geographic_area)
write_csv(regional, here::here("shiny_data","regional.csv"))#for regional page
# industry outlook-------------
#' breakdown for aggregate_industry and industry, for aggregate NOC (NOC="#T")
long_and_industry_mapping <- long_by_industry%>%
full_join(industry_mapping)
by_aggregate <- group_nest_agg(long_and_industry_mapping, aggregate_industry)%>%
na.omit()%>%
mutate(industry=NA)
by_industry <- group_nest_agg(long_and_industry_mapping, industry)%>%
left_join(industry_mapping, multiple = "all")%>%
na.omit()
industry_outlook <- bind_rows(by_aggregate, by_industry)%>%
get_measures()
write_csv(industry_outlook, here::here("shiny_data","industry_outlook.csv")) #for industry outlook plot and table
#occupation outlook plot-----------------
#breakdown for NOC1, NOC2, NOC3, for aggregate industry (industry="all_industries)
long_and_noc_mapping <- long_by_noc%>%
full_join(noc_mapping)
two_to_one <- unique(noc_mapping[c("noc1","noc2")])
three_to_one <- unique(noc_mapping[c("noc1","noc2","noc3")])
occupation_outlook_no_ed <- aggregate_by_noc(long_and_noc_mapping)
occupation_outlook <- occupation_outlook_no_ed%>%
left_join(typical_education)%>%
select(-noc)
write_csv(occupation_outlook, here::here("shiny_data","occupation_outlook.csv")) #for occupation outlook plot and highlights part 1 pie.
#occupation outlook table------------------------
#' this was tricky... typical education only provided at 4 digit level... So we partition the data by typical
#' education and then do the aggregation NOC1,NOC2,NOC3 for each of the typical education levels (again for aggregate industry).
#' also input to JO_500 and HOO.
occupation_outlook_table <- long_and_noc_mapping%>%
left_join(typical_education)%>%
group_by(`Typical_Education`)%>%
nest()%>%
mutate(data=map(data, aggregate_by_noc))%>%
unnest(data)
write_csv(occupation_outlook_table, here::here("shiny_data","occupation_outlook_table.csv"))#for occupation outlook table
# Job openings 500 occupations and HOO------------
wages_and_interests <- wages%>%
left_join(noc_mapping)%>%
left_join(typical_education)%>%
group_by(`Typical_Education`)%>%
nest()%>%
mutate(data=map(data, get_mean_wages))%>%
unnest(data)%>%
mutate(number_noc=word(noc, 1, sep = "_"))%>%
left_join(interests, by=c("number_noc"="noc"))%>%
mutate(noc = case_when(is.na(noc) & is.na(NOC4) & is.na(NOC3) & is.na(NOC2) ~NOC1,
is.na(noc) & is.na(NOC4) & is.na(NOC3) ~ NOC2,
is.na(noc) & is.na(NOC4) ~ NOC3,
TRUE ~ NOC4),
noc = word(noc, 1, sep = "_"))%>%
select(-number_noc)
just_jo <- occupation_outlook_table%>%
filter(name=="Job Openings")%>%
mutate(noc = case_when(is.na(noc) & is.na(NOC4) & is.na(NOC3) & is.na(NOC2) ~NOC1,
is.na(noc) & is.na(NOC4) & is.na(NOC3) ~ NOC2,
is.na(noc) & is.na(NOC4) ~ NOC3,
TRUE ~ NOC4),
noc = word(noc, 1, sep = " "))
jo_500 <- just_jo%>%
left_join(wages_and_interests, by=c("noc"="noc","Typical_Education"="Typical_Education"))%>%
select(-ends_with(".y"))%>%
rename(NOC1=NOC1.x,
NOC2=NOC2.x,
NOC3=NOC3.x,
NOC4=NOC4.x)%>%
left_join(whos_hoo,
by=c("noc"="noc",
"Geographic_Area"="geographic_area")
)%>%
select(-noc)
write_csv(jo_500, here::here("shiny_data","jo_500.csv")) #for jo_500 and HOO tables
# document objects for 03_knit_me.Rmd----------
df_names = ls()[sapply(ls(), function(x) any(is.data.frame(get(x))))]
dfs <- tibble::tibble(df_names)%>%
mutate(info = map(df_names, col_names_type_example))
saveRDS(dfs, "dataframes.RDS")
tictoc::toc()