-
Notifications
You must be signed in to change notification settings - Fork 82
/
standard_lm.R
167 lines (128 loc) · 4.23 KB
/
standard_lm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#' ---
#' title: "Standard Linear Models"
#' author: "Michael Clark"
#' css: '../other.css'
#' highlight: pygments
#' date: ""
#' ---
#'
#'
#' # Introduction
#'
#' A standard regression model via maximum likelihood or least squares loss.
#' Also examples for QR decomposition and normal equations. Can serve as an
#' entry point for those starting out to the wider world of computational
#' statistics as maximum likelihood is the fundamental approach used in most
#' applied statistics, but which is also a key aspect of the Bayesian approach.
#' Least squares loss is not confined to the standard lm setting, but is widely
#' used in more predictive/'algorithmic' approaches e.g. in machine learning and
#' elsewhere.
#'
#' # Data Setup
#'
set.seed(123) # ensures replication
# predictors and response
N = 100 # sample size
k = 2 # number of desired predictors
X = matrix(rnorm(N * k), ncol = k)
y = -.5 + .2*X[, 1] + .1*X[, 2] + rnorm(N, sd = .5) # increasing N will get estimated values closer to these
dfXy = data.frame(X, y)
#'
#' # Functions
#'
#' A maximum likelihood approach.
lm_ML = function(par, X, y) {
# par: parameters to be estimated
# X: predictor matrix with intercept column
# y: response
# setup
beta = par[-1] # coefficients
sigma2 = par[1] # error variance
sigma = sqrt(sigma2)
N = nrow(X)
# linear predictor
LP = X %*% beta # linear predictor
mu = LP # identity link in the glm sense
# calculate likelihood
L = dnorm(y, mean = mu, sd = sigma, log = TRUE) # log likelihood
# L = -.5*N*log(sigma2) - .5*(1/sigma2)*crossprod(y-mu) # alternate log likelihood form
-sum(L) # optim by default is minimization, and we want to maximize the likelihood
# (see also fnscale in optim.control)
}
# An approach via least squares loss function.
lm_LS = function(par, X, y) {
# arguments-
# par: parameters to be estimated
# X: predictor matrix with intercept column
# y: response
# setup
beta = par # coefficients
# linear predictor
LP = X %*% beta # linear predictor
mu = LP # identity link
# calculate least squares loss function
L = crossprod(y - mu)
}
#' # Obtain Model Estimates
#'
#' Setup for use with optim.
X = cbind(1, X)
#' Initial values. Note we'd normally want to handle the sigma differently as
#' it's bounded by zero, but we'll ignore for demonstration. Also sigma2 is not
#' required for the LS approach as it is the objective function.
init = c(1, rep(0, ncol(X)))
names(init) = c('sigma2', 'intercept', 'b1', 'b2')
optlmML = optim(
par = init,
fn = lm_ML,
X = X,
y = y,
control = list(reltol = 1e-8)
)
optlmLS = optim(
par = init[-1],
fn = lm_LS,
X = X,
y = y,
control = list(reltol = 1e-8)
)
pars_ML = optlmML$par
pars_LS = c(sigma2 = optlmLS$value / (N - k - 1), optlmLS$par) # calculate sigma2 and add
#' # Comparison
#'
#' Compare to `lm` which uses QR decomposition.
modlm = lm(y ~ ., dfXy)
#' Example
#'
# QRX = qr(X)
# Q = qr.Q(QRX)
# R = qr.R(QRX)
# Bhat = solve(R) %*% crossprod(Q, y)
# alternate: qr.coef(QRX, y)
round(
rbind(
pars_ML,
pars_LS,
modlm = c(summary(modlm)$sigma^2, coef(modlm))),
digits = 3
)
#' The slight difference in sigma is roughly maxlike dividing by N vs. N-k-1 in
#' the traditional least squares approach; diminishes with increasing N as both
#' tend toward whatever sd^2 you specify when creating the y response above.
#'
#'Compare to glm, which by default assumes gaussian family with identity link
#'and uses `lm.fit`.
#'
modglm = glm(y ~ ., data = dfXy)
summary(modglm)
#' Via normal equations.
coefs = solve(t(X) %*% X) %*% t(X) %*% y # coefficients
#' Compare.
#'
sqrt(crossprod(y - X %*% coefs) / (N - k - 1))
summary(modlm)$sigma
sqrt(modglm$deviance / modglm$df.residual)
c(sqrt(pars_ML[1]), sqrt(pars_LS[1]))
# rerun by adding 3-4 zeros to the N
#' # Source
#' Base R source code found at https://github.com/m-clark/Miscellaneous-R-Code/blob/master/ModelFitting/standard_lm.R