-
Notifications
You must be signed in to change notification settings - Fork 4
/
visualisation of correlation matrix.R
73 lines (57 loc) · 1.65 KB
/
visualisation of correlation matrix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
setwd("C:/Users/siamak/Desktop/Project")
dir()
data<-read.csv("combined.csv")
View(data)
# The first thing that you should do is check the class of your data frame:
class(data)
dim(data)
summary (data)
install.packages("dplyr")
library(dplyr)
glimpse (data)
sum (!complete.cases(data))
data2 <- data[,-2]
data2 <- data[,27:46]
data1 <- data.frame(data2,data$BUDGET,data$Tot_Population,data$Unemploy)
df <- filter(data1,data$One_method_Train_P!= "na")
glimpse (df)
#Visualize correlation matrix using correlogram
install.packages(corrplot)
library(corrplot)
M<-cor(df)
head(round(M,2))
corrplot(M, type = "upper", order = "hclust",
tl.col = "black")
# to the native R cor.test function
cor.mtest <- function(mat, ...)
{
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1))
{
for (j in (i + 1):n)
{
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
# matrix of the p-value of the correlation
p.mat <- cor.mtest(df)
head(p.mat[, 1:5])
corrplot(M, type = "upper", order = "hclust",
p.mat = p.mat, sig.level = 0.01)
col <- colorRampPalette(c("#BB4444", "#EE9988",
"#FFFFFF", "#77AADD",
"#4477AA"))
corrplot(M, method = "color", col = col(300),
type = "upper", order = "hclust",
addCoef.col = "black",
tl.col="black",
p.mat = p.mat, sig.level = 0.01,
diag = FALSE
)