-
Notifications
You must be signed in to change notification settings - Fork 0
/
multiMiR script
65 lines (65 loc) · 2.42 KB
/
multiMiR script
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# edgeR analysis
library("edgeR")
library(multiMiR)
library(dplyr)
library(writexl)
rm(list=ls())
BioCC_input = read.table("/path/to/featCounts.txt",sep='\t',header=TRUE)
head(BioCC_input)
rownames(BioCC_input) <- BioCC_input[,1]
BioCC_input[,1:6] <- NULL
head(BioCC_input)
ls()
BioCC_input_zero_filt <- BioCC_input[rowSums(BioCC_input == 0) <= 6,]
Medians = numeric()
for (i in 1:nrow(BioCC_input_zero_filt)){
Medians[i] <- median(as.numeric(BioCC_input_zero_filt[i,]))
}
BioCC_final <- BioCC_input_zero_filt[Medians>=1,]
rpk <- BioCC_final
dim(rpk)
head(rpk)
# Group files by either read or control
Groups <- as.factor(c(rep("read",2),rep("Ctrl",1)))
Groups
# Using trended dispersions
rpk.norm.g <- DGEList(counts=rpk,group=Groups)
rpk.norm.g <- calcNormFactors(rpk.norm.g)
norm.counts.rpk.g <- cpm(rpk.norm.g)
head(norm.counts.rpk.g)
dim(norm.counts.rpk.g)
plotMDS(rpk.norm.g,top=nrow(norm.counts.rpk.g),gene.selection = "common")
[20]: plotMDS(rpk.norm.g,top=500,gene.selection = "common")
plotMDS(rpk.norm.g,top=500,gene.selection = "common")
design.mat = model.matrix(~ 0 + Groups)
design.mat
rpk.norm.g <- estimateDisp(rpk.norm.g,design.mat)
names(rpk.norm.g)
mean(rpk.norm.g$tagwise.dispersion)
plotBCV(rpk.norm.g)
# Fit GLM model
fit <- glmFit(rpk.norm.g,design.mat)
Ctrlvread <- makeContrasts(GroupsCtrl-Groupsread,levels=design.mat)
readvCtrl <- makeContrasts(Groupsread-GroupsCtrl,levels=design.mat)
read_lrt <- glmLRT(fit,contrast = readvCtrl)
# Table of unadjusted p-values (PValue) and FDR values
DGE_read_lrt <- topTags(read_lrt,n=Inf,adjust.method = "BH")
# Getting top differentially expressed miRNAs
top_miRNAs <- rownames(DGE_read_lrt$table)[1:10]
top_miRNAs
# Plug miRNAs into multiMiR and get validated targets
multimir_results <- get_multimir(org = 'hsa',
mirna = top_miRNAs,
table = 'validated',
summary = TRUE)
# Parse multimir_results for unique miRNAs
multimir_results_data <- as.data.frame(multimir_results@data)
unique_top10mirnas <- multimir_results@data %>% distinct(mature_mirna_id, .keep_all = TRUE)
as.data.frame(unique_top10mirnas)
write_xlsx(unique_top10mirnas, "/path/to/unique_top10mirnas")
# Display first 10 hits
head(multimir_results@data, n=10)
# Print top 100 hits
print.data.frame(multimir_results@data)
# To convert dispersion plot to csv for GO Ontology
write.csv(multimir_results@data, "/path/to/multimir_dispersion.csv")