-
Notifications
You must be signed in to change notification settings - Fork 0
/
18-anonymize.R
39 lines (37 loc) · 1.09 KB
/
18-anonymize.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
library(tidyverse)
tictoc::tic()
read_rds("data/uu_clean_data.rds") %>%
select(-tipo_doc,-otro_dni,
-(apellido_paterno:otro_telefono),
-nombre_completo,
-username,
-fecha_nacimiento,
-contains("gps"),-latitud,-longitud) %>%
# select(dni,nombres) %>%
# distinct()
# slice(1:10) %>%
# glimpse()
# filter(is.na(dni)) %>%
# filter(is.na(telefono)) %>%
# naniar::vis_miss()
# naniar::miss_var_summary()
# mutate(across(.cols = c(dni,telefono),
# .fns = epitrix::hash_names, full = FALSE))
mutate(hash=pmap(.l = select(.,dni,nombres),
.f = epitrix::hash_names,
# hashfun = "fast")) %>%
hashfun = "secure")) %>%
unnest(hash) %>%
select(-dni,-nombres,-label,-hash) %>%
select(hash_short, everything()) %>%
# glimpse()
write_rds("data/uu_clean_data_hash.rds")
tictoc::toc()
#95.83 sec elapsed
read_rds("data/uu_clean_data_hash.rds") %>%
# count(username) %>%
# avallecam::print_inf()
distinct()
# count(hash_short) %>%
# arrange(desc(n))
# glimpse()