-
Notifications
You must be signed in to change notification settings - Fork 0
/
lda_testing.R
42 lines (32 loc) · 895 Bytes
/
lda_testing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
library(topicmodels)
data("AssociatedPress")
ap_lda <- LDA(AssociatedPress, k = 2, control = list(seed = 1234))
ap_lda
library(tidytext)
ap_topics <- tidy(ap_lda, matrix = "beta")
ap_topics
library(ggplot2)
library(dplyr)
ap_top_terms <- ap_topics %>%
group_by(topic) %>%
top_n(10, beta) %>%
ungroup() %>%
arrange(topic, -beta)
ap_top_terms %>%
mutate(term = reorder(term, beta)) %>%
ggplot(aes(term, beta, fill = factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip()
library(tidyr)
beta_spread <- ap_topics %>%
mutate(topic = paste0("topic", topic)) %>%
spread(topic, beta) %>%
filter(topic1 > .001 | topic2 > .001) %>%
mutate(log_ratio = log2(topic2 / topic1))
beta_spread
ap_documents <- tidy(ap_lda, matrix = "gamma")
ap_documents
tidy(AssociatedPress) %>%
filter(document == 6) %>%
arrange(desc(count))