-
Notifications
You must be signed in to change notification settings - Fork 82
/
gradient_descent.R
120 lines (94 loc) · 2.39 KB
/
gradient_descent.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#' ---
#' title: "Gradient Descent"
#' author: "Michael Clark"
#' css: '../other.css'
#' highlight: pygments
#' date: ""
#' ---
#' Gradient descent for a standard linear regression model. The function takes
#' arguments starting points for the parameters to be estimated, a tolerance or
#' maximum iteration value to provide a stopping point, stepsize (or starting
#' stepsize for adaptive approach), whether to print out iterations, and whether
#' to plot the loss over each iteration.
#'
#'
#'
#' # Data Setup
#'
#' Create some basic data for standard regression.
set.seed(8675309)
n = 1000
x1 = rnorm(n)
x2 = rnorm(n)
y = 1 + .5*x1 + .2*x2 + rnorm(n)
X = cbind(Intercept = 1, x1, x2) # model matrix
#' # Gradient Descent Algorithm
gd = function(
par,
X,
y,
tolerance = 1e-3,
maxit = 1000,
stepsize = 1e-3,
adapt = FALSE,
verbose = TRUE,
plotLoss = TRUE
) {
# initialize
beta = par; names(beta) = colnames(X)
loss = crossprod(X %*% beta - y)
tol = 1
iter = 1
while(tol > tolerance && iter < maxit){
LP = X %*% beta
grad = t(X) %*% (LP - y)
betaCurrent = beta - stepsize * grad
tol = max(abs(betaCurrent - beta))
beta = betaCurrent
loss = append(loss, crossprod(LP - y))
iter = iter + 1
if (adapt)
stepsize = ifelse(
loss[iter] < loss[iter - 1],
stepsize * 1.2,
stepsize * .8
)
if (verbose && iter %% 10 == 0)
message(paste('Iteration:', iter))
}
if (plotLoss)
plot(loss, type = 'l', bty = 'n')
list(
par = beta,
loss = loss,
RSE = sqrt(crossprod(LP - y) / (nrow(X) - ncol(X))),
iter = iter,
fitted = LP
)
}
#' ## Run
#'
#' Set starting values.
init = rep(0, 3)
#' For any particular data you'd have to fiddle with the `stepsize`, which could
#' be assessed via cross-validation, or alternatively one can use an
#' adaptive approach, a simple one of which is implemented in this function.
gd_result = gd(
init,
X = X,
y = y,
tolerance = 1e-8,
stepsize = 1e-4,
adapt = TRUE
)
str(gd_result)
#' ## Comparison
#'
#' We can compare to standard linear regression.
rbind(
gd = round(gd_result$par[, 1], 5),
lm = coef(lm(y ~ x1 + x2))
)
# summary(lm(y ~ x1 + x2))
#' # Source
#' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/gradient_descent.R