-
Notifications
You must be signed in to change notification settings - Fork 0
/
65-topic-disease.Rmd
113 lines (85 loc) · 4.51 KB
/
65-topic-disease.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# 肠道菌群与疾病 {#disease}
在这一章节,我们将分析最常见的十余种与肠道菌群相关的疾病研究的概况。
## 疾病类型概览
```{r}
disease <- list(cancer_keywords,cardio_keywords,ibd_keywords,ibs_keywords,
alzheimer_keywords,autism_keywords,hepatology_keywords,
allergy_keywords,obesity_keywords,diabetes_keywords,asthma_keywords,
diarrhea_keywords,constipation_keywords)
disease_name <- c("cancer","CVD","IBD","IBS","Alzheimer's","autism","hepatology","allergy","obesity","diabetes","asthma","diarrhea","constipation")
disease_name_cn <- c("肿瘤","心血管疾病","炎症性肠病","肠易激综合征","阿尔兹海默症","自闭症","肝病","过敏","肥胖","糖尿病","哮喘","腹泻","便秘")
names(disease_name_cn) <- disease_name
```
一共有 `r length(disease_name)` 中不同的疾病纳入了我们的分析之中。
它们分别是:`r paste0(disease_name_cn, collapse="、")` 等。
通过比较常见的与肠道菌群相关疾病的研究论文数量,可以得到最受关注的疾病类型分别是:
肿瘤、肥胖、炎症性肠病、心血管疾病、糖尿病等(图 \@ref(fig:disease-research-count))。
```{r}
disease_subtopic_core_article <- lapply(disease, function(keyword){
MC_HC_article %>%
filter(str_detect(content, regex(paste0(c(keyword$primary),collapse = "|"), ignore_case = T))) %>%
mutate(DT=factor(DT))
})
names(disease_subtopic_core_article) <- disease_name
disease_subtopic_core_article_count <- lapply(disease_subtopic_core_article, function(article){
article %>% group_by(PY) %>%
summarise(nRecord = n())
})
```
```{r disease-research-count, fig.cap="过去20年在WoS数据库中收录的肠道菌群研究中,与10多种疾病相关的关键论文数量"}
count <- sapply(disease_subtopic_core_article, nrow)
count <- data.frame(disease=disease_name_cn, count=count)
count <- count %>% arrange(desc(count)) %>%
mutate(disease= fct_rev(as_factor(as.character(disease))))
count$angle <- 1:length(disease_name) * 360 / length(disease_name)
ggplot(count, aes(disease,count,fill=count)) +
geom_col(width = 0.9) +
geom_col(aes(y=I(2)),width = 1,fill="white") +
geom_text(aes(label=disease,y=count*0.85),color="white",vjust=0.5,size=3,fontface="bold",data = function(d) d[d$count>3000,]) +
geom_text(aes(label=disease,y=count*1.02),color="black",vjust=0,hjust=0,size=3,fontface="bold",data = function(d) d[d$count<3000,]) +
annotate(geom="text",y=800,x=1,label="数据分析:热心肠研究院",color="grey",size=3,hjust=0.9,vjust=1) +
labs(x="",y="",title = "") +
coord_flip() +
scale_fill_gradientn(colors=c("darkgreen","green","orange","firebrick","red"),trans="sqrt") +
# coord_polar(direction = -1) +
# theme_void() +
theme(legend.position = "none",
axis.text.y = element_blank())
# graph2ppt()
# count
```
这10多种疾病之间的联系,可以从图 \@ref(fig:disease-overlap) 中窥其一斑。
```{r disease-overlap, fig.cap="10多种疾病之间的重叠度"}
# 不同疾病文献间的重叠度
SR <- lapply(disease_subtopic_core_article, function(d) d %>% pull(SR))
names(SR) <- disease_name_cn
library(UpSetR)
# 这个重叠度就比较低
upset(fromList(SR),
keep.order = F,
sets = disease_name_cn,
mb.ratio = c(0.5,0.5))
```
- 肿瘤与炎症性肠病之间的关系最为密切,或可佐证长期发炎可致癌的观点;
- 肥胖和糖尿病之间的关系也很密切,两种疾病相伴相生诚不欺我;
- 肥胖还与肿瘤有关,说明胖真的是坏处多多;
- 过敏和哮喘也是关系密切,本质上都属于免疫系统过于敏感造成的疾病;
- 等等等等。
```{r}
# 针对疾病章节提取文章
disease_dt_output <- function(name){
topic_cn <- disease_name_cn[[name]]
topic_caption <- paste(topic_cn, "相关的关键论文")
df <- biblio_df(disease_subtopic_core_article[[name]]) %>%
filter(CORE==TRUE | HC==TRUE)
widget <- produce_topic_article_datatable(df, caption = topic_caption, filename = topic_caption)
return(list(widget = widget, data = df))
}
# 对一批论文进行四维分析,显示国家、机构、人员和期刊
four_dimension_barplot <- function(M, tags = c("AU_CO_NR","AU_UN_NR","AU","J9")){
plots <- lapply(tags, function(x){
tableTag_barplot(M,Tag = x, n=10) + labs(title="") + scale_y_continuous(expand = c(0,1))
})
plot_grid(plotlist = plots, labels = "AUTO")
}
```