-
Notifications
You must be signed in to change notification settings - Fork 0
/
xgboost.R
62 lines (46 loc) · 1.7 KB
/
xgboost.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
##xgboost
library(xgboost)
library(dplyr)
library(magrittr)
library(Matrix)
#data prep
creditcard<-read.csv("D:/creditcard.csv")
head(creditcard,10)
summary(creditcard$Class)
str(creditcard)
summary(creditcard)
length(which(creditcard$Class==1))
#data partitioning
set.seed(100)
index <- sample(1:nrow(creditcard), nrow(creditcard)*0.7)
training<- creditcard[index,]
validation<- creditcard[-index,]
#one hot encoding
train_label<-training[,"Class"]
xgb.data.train <- xgb.DMatrix(as.matrix(training[, colnames(training) != "Class"]), label = train_label)
test_label<-validation[,"Class"]
xgb.data.test <- xgb.DMatrix(as.matrix(validation[, colnames(validation) != "Class"]), label = test_label)
#parameters
nc<-length(unique(train_label))
xgb_params<-list("objective"="binary:logistic",
"eval_metric"="auc",
"numclass"=nc)
watchlist<-list(train= xgb.data.train, test= xgb.data.test)
##eXtreme gradient boosting model
bst_model<-xgb.train(params=xgb_params,
data=xgb.data.train,
nrounds=500,
watchlist=watchlist)
##training and test error plot
e<-data.frame(bst_model$evaluation_log)
plot(e$iter, e$train_auc ,col='blue')
##feature importance
xgb.feature.imp = xgb.importance(model = bst_model)
plot(xgb.feature.imp$Feature,xgb.feature.imp$Gain)
##validation of model
xgb.test.hist = predict(bst_model
, newdata = as.matrix(validation[, colnames(validation) != "Class"])
, ntreelimit = bst_model$bestInd)
library(pROC)
auc.xgb.hist = roc(validation$Class, xgb.test.hist, plot = TRUE, col = "blue")
print(auc.xgb.hist)