forked from dmlc/XGBoost.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cross_validation.jl
71 lines (60 loc) · 2.61 KB
/
cross_validation.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
using XGBoost
# load file from text file, also binary buffer generated by xgboost
const DATAPATH = joinpath(@__DIR__, "../data")
dtrain = DMatrix(joinpath(DATAPATH, "agaricus.txt.train"))
dtest = DMatrix(joinpath(DATAPATH, "agaricus.txt.test"))
#Defining parameters for xgboost
param = ["max_depth" => 2,
"eta" => 1,
"silent" => 1,
"objective" => "binary:logistic"]
num_round = 2
nfold = 5
print("running cross validation\n")
# do cross validation, this will print result out as
# [iteration] metric_name:mean_value+std_value
# std_value is standard deviation of the metric
nfold_cv(dtrain, num_round, nfold, param = param, metrics = ["error"], seed = 0)
print("running cross validation, disable standard deviation display\n")
# do cross validation, this will print result out as
# [iteration] metric_name:mean_value+std_value
# std_value is standard deviation of the metric
nfold_cv(dtrain, num_round, nfold, param = param, metrics = ["error"], seed = 0, show_stdv = false)
print("running cross validation, with preprocessing function\n")
# define the preprocessing function
# used to return the preprocessed training, test data, and parameter
# we can use this to do weight rescale, etc.
# as a example, we try to set scale_pos_weight
function fpreproc(dtrain::DMatrix, dtest::DMatrix, param)
label = get_info(dtrain, "label")
ratio = sum(label == 0) / sum(label == 1)
param["scale_pos_weight"] = ratio
return dtrain, dtest, param
end
# do cross validation, for each fold
# the dtrain, dtest, param will be passed into fpreproc
# then the return value of fpreproc will be used to generate
# results of that fold
nfold_cv(dtrain, num_round, nfold, param = param, metrics = ["auc"],
seed = 0, show_stdv = false, fpreproc = fpreproc)
print("running cross validation, with customized loss function\n")
###
# you can also do cross validation with cutomized loss function
# See custom_objective.py
##
function logregobj(preds::Vector{Float32}, dtrain::DMatrix)
labels = get_info(dtrain, "label")
preds = 1.0 ./ (1.0 + exp(-preds))
grad = preds - labels
hess = preds .* (1. - preds)
return grad, hess
end
function evalerror(preds::Vector{Float32}, dtrain::DMatrix)
labels = get_info(dtrain, "label")
# return a pair metric_name, result
# since preds are margin(before logistic transformation, cutoff at 0)
return "self-error", sum((preds .> 0.0) .!= labels) / float(size(preds)[1])
end
# train with customized objective
nfold_cv(dtrain, num_round, nfold, metrics = [], seed = 0, obj = logregobj, feval = evalerror,
max_depth = 2, eta = 1, silent = 1)