-
Notifications
You must be signed in to change notification settings - Fork 63
/
fitFfm.R
423 lines (389 loc) · 18.5 KB
/
fitFfm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
#' @title Fit a fundamental factor model using cross-sectional regression
#'
#' @description Fit a fundamental (cross-sectional) factor model using ordinary
#' least squares or robust regression. Fundamental factor models use observable
#' asset specific characteristics (or) fundamentals, like industry
#' classification, market capitalization, style classification (value, growth)
#' etc. to calculate the common risk factors. An object of class \code{"ffm"}
#' is returned.
#'
#' @details
#' Estimation method "LS" corresponds to ordinary least squares using
#' \code{\link[stats]{lm}} and "Rob" is robust regression using
#' \code{\link[RobStatTM]{lmrobdetMM}}. "WLS" is weighted least squares using estimates
#' of the residual variances from LS regression as weights (feasible GLS).
#' Similarly, "W-Rob" is weighted robust regression.
#'
#' The weights to be used in "WLS" or "W-Rob" can be set using
#' \code{resid.scaleType} argument which computes the residual variances in one of the following ways -
#' sample variace, EWMA, Robust EWMA and GARCH(1,1). The inverse of these residual variances
#' are used as the weights. For EWMA model, lambda = 0.9 is used as default and for GARCH(1,1)
#' omega = 0.09, alpha = 0.1, and beta = 0.81 are used as default as mentioned in Martin & Ding (2017).
#' These default parameters can be changed using the arguments \code{lambda},
#' \code{GARCH.params} for EWMA and GARCH respectively. To compute GARCH
#' parameters via MLE, set \code{GARCH.MLE} to \code{TRUE}. Make sure you have
#' the rugarch package installed and loaded, as is merely listed as SUGGESTS.
#'
#' Standardizing style factor exposures: The exposures can be standardized into
#' z-scores using regular or robust (see \code{rob.stats}) measures of location
#' and scale. Further, \code{weight.var}, a variable such as market-cap, can be
#' used to compute the weighted mean exposure, and an equal-weighted standard
#' deviation of the exposures about the weighted mean. This may help avoid an
#' ill-conditioned covariance matrix. Default option equally weights exposures
#' of different assets each period.
#'
#' If \code{rob.stats=TRUE}, \code{\link[RobStatTM]{covRob}} is used to compute a
#' robust estimate of the factor covariance/correlation matrix, and,
#' \code{\link[robustbase]{scaleTau2}} is used to compute robust tau-estimates
#' of univariate scale for residuals during "WLS" or "W-Rob" regressions. When
#' standardizing style exposures, the \code{\link[stats]{median}} and
#' \code{\link[stats]{mad}} are used for location and scale respectively.
#' When \code{resid.scaleType} is EWMA or GARCH, the residual covariance is equal to the
#' diagonal matrix of the estimated residual variances in last time period.
#'
#'
#' The original function was designed by Doug Martin and initially implemented
#' in S-PLUS by a number of University of Washington Ph.D. students:
#' Christopher Green, Eric Aldrich, and Yindeng Jiang. Guy Yollin ported the
#' function to R and Yi-An Chen modified that code. Sangeetha Srinivasan
#' re-factored, tested, corrected and expanded the functionalities and S3
#' methods.
#'
#' @importFrom xts as.xts
#' @importFrom zoo as.yearmon
#' @importFrom PerformanceAnalytics checkData skewness kurtosis
#' @importFrom robustbase scaleTau2 covOGK
#' @importFrom RobStatTM lmrobdetMM covRob covClassic
#' @importFrom stats lm as.formula coef contr.treatment fitted mad median
#' model.matrix na.exclude na.fail na.omit var
#'
#' @param data data.frame of the balanced panel data containing the variables
#' \code{asset.var}, \code{ret.var}, \code{exposure.vars}, \code{date.var} and
#' optionally, \code{weight.var}.
#' @param asset.var character; name of the variable for asset names.
#' @param ret.var character; name of the variable for asset returns.
#' @param date.var character; name of the variable containing the dates
#' coercible to class \code{Date}.
#' @param exposure.vars vector; names of the variables containing the
#' fundamental factor exposures.
#' @param weight.var character; name of the variable containing the weights
#' used when standarizing style factor exposures. Default is \code{NULL}. See
#' Details.
#' @param fit.method method for estimating factor returns; one of "LS", "WLS"
#' "Rob" or "W-Rob". See details. Default is "LS".
#' @param rob.stats logical; If \code{TRUE}, robust estimates of covariance,
#' correlation, location and univariate scale are computed as appropriate (see
#' Details). Default is \code{FALSE}.
#' @param full.resid.cov logical; If \code{TRUE}, a full residual covariance
#' matrix is estimated. Otherwise, a diagonal residual covariance matrix is
#' estimated. Default is \code{FALSE}.
#' @param z.score method for exposure standardization; one of "none",
#' "crossSection", or "timeSeries". Default is \code{"none"}.
#' @param addIntercept logical; If \code{TRUE}, intercept is added in the
#' exposure matrix. Note, if 2 or more variables are categorical, this must be
#' false. Default is \code{FALSE}.
#' @param lagExposures logical; If \code{TRUE}, the style exposures in the
#' exposure matrix are lagged by one time period. Default is \code{TRUE},
#' @param resid.scaleType character; Only valid when fit.method is set to WLS or
#' W-Rob. The weights used in the weighted regression are estimated using
#' sample variance, classic EWMA, robust EWMA or GARCH model. Valid values are
#' \code{stdDev}, \code{EWMA}, \code{robEWMA}, or \code{GARCH}.Default is
#' \code{stdDev} where the inverse of residual sample variances are used as the
#' weights. If using GARCH option, make sure to install and load
#' rugarch package.
#' @param lambda lambda value to be used for the EWMA estimation of residual
#' variances. Default is 0.9
#' @param GARCH.params list containing GARCH parameters omega, alpha, and beta.
#' Default values are (0.09, 0.1, 0.81) respectively. Valid only when
#' \code{GARCH.MLE} is set to \code{FALSE}. Estimation outsourced to the
#' rugarch package, please load it first.
#' @param GARCH.MLE boolean input (TRUE|FALSE), default value = \code{FALSE}. This
#' argument allows one to choose to compute GARCH parameters by maximum
#' likelihood estimation. Estimation outsourced to the rugarch
#' package, please load it.
#' @param analysis method used in the analysis of fundamental law of active
#' management; one of "none", "ISM", or "NEW". Default is "none".
#' @param stdReturn logical; If \code{TRUE}, the returns will be standardized
#' using GARCH(1,1) volatilities. Default is \code{FALSE}. Make sure to load
#' rugarch package.
#' @param targetedVol numeric; the targeted portfolio volatility in the analysis.
#' Default is 0.06.
#' @param ... potentially further arguments passed.
#'
#' @return \code{fitFfm} returns an object of class \code{"ffm"} for which
#' \code{print}, \code{plot}, \code{predict} and \code{summary} methods exist.
#'
#' The generic accessor functions \code{coef}, \code{fitted} and
#' \code{residuals} extract various useful features of the fit object.
#' Additionally, \code{fmCov} computes the covariance matrix for asset returns
#' based on the fitted factor model.
#'
#' An object of class \code{"ffm"} is a list containing the following
#' components:
#' \item{factor.fit}{list of fitted objects that estimate factor returns in each
#' time period. Each fitted object is of class \code{lm} if
#' \code{fit.method="LS" or "WLS"}, or, class \code{lmrobdetMM} if
#' \code{fit.method="Rob" or "W-Rob"}.}
#' \item{beta}{N x K matrix of factor exposures for the last time period.}
#' \item{factor.returns}{xts object of K-factor returns (including intercept).}
#' \item{residuals}{xts object of residuals for N-assets.}
#' \item{r2}{length-T vector of R-squared values.}
#' \item{factor.cov}{K x K covariance matrix of the factor returns.}
#' \item{g.cov}{ covariance matrix of the g coefficients for a Sector plus market and Sector plus Country plus global market models .}
#' \item{resid.cov}{N x N covariance matrix of residuals.}
#' \item{return.cov}{N x N return covariance estimated by the factor model,
#' using the factor exposures from the last time period.}
#' \item{restriction.mat}{The restriction matrix used in the computation of f=Rg.}
#' \item{resid.var}{N x T matrix of estimated residual variances. It will be a length-N vector of sample residual variances when \code{resid.scaleType} is set to \code{stdDev} }
#' \item{call}{the matched function call.}
#' \item{data}{data frame object as input.}
#' \item{date.var}{date.var as input}
#' \item{ret.var}{ret.var as input}
#' \item{asset.var}{asset.var as input.}
#' \item{exposure.vars}{exposure.vars as input.}
#' \item{weight.var}{weight.var as input.}
#' \item{fit.method}{fit.method as input.}
#' \item{asset.names}{length-N vector of asset names.}
#' \item{factor.names}{length-K vector of factor.names.}
#' \item{time.periods}{length-T vector of dates.}
#' Where N is the number of assets, K is the number of factors (including the
#' intercept or dummy variables) and T is the number of unique time periods.
#' \item{activeWeights}{active weights obtaining from the fundamental law of active management}
#' \item{activeReturns}{active returns corresponding to the active weights}
#' \item{IR}{the vector of Granold-K, asymptotic IR, and finite-sample IR.}
#' Where N is the number of assets, K is the number of factors (including the
#' intercept or dummy variables) and T is the number of unique time periods.
#'
#' @author Sangeetha Srinivasan, Guy Yollin, Yi-An Chen, Avinash Acharya and Chindhanai Uthaisaad
#'
#' @references
#' Menchero, J. (2010). The Characteristics of Factor Portfolios. Journal of
#' Performance Measurement, 15(1), 52-62.
#'
#' Grinold, R. C., & Kahn, R. N. (2000). Active portfolio management (Second
#' Ed.). New York: McGraw-Hill.
#'
#' Ding, Z. and Martin, R. D. (2016). "The Fundamental Law of Active Management Redux", SSRN 2730434.
#'
#'
#' And, the following extractor functions: \code{\link[stats]{coef}},
#' \code{\link[stats]{fitted}}, \code{\link[stats]{residuals}},
#' \code{\link{fmCov}}, \code{\link{fmSdDecomp}}, \code{\link{fmVaRDecomp}}
#' and \code{\link{fmEsDecomp}}.
#'
#' \code{\link{paFm}} for Performance Attribution.
#'
#' @examples
#'
#'\dontrun{
#' # load data
#'data(stocksCRSP)
#'data(factorsSPGMI)
#'
#'stocks_factors <- selectCRSPandSPGMI(stocks = stocksCRSP, factors = factorsSPGMI,
#' dateSet = c("2006-01-31", "2010-12-31"),
#' stockItems = c("Date", "TickerLast",
#' "CapGroup", "Sector",
#' "Return", "Ret13WkBill",
#' "mktIndexCRSP"),
#' factorItems = c("BP", "LogMktCap", "SEV"),
#' capChoice = "SmallCap",
#' Nstocks = 20)
#'
#' # fit a fundamental factor model with style variables BP and LogMktCap
#'
#'fundamental_model <- fitFfm(data = stocks_factors,
#' asset.var = "TickerLast",
#' ret.var = "Return",
#' date.var = "Date",
#' exposure.vars = c("BP", "LogMktCap")
#' )
#'
#' summary(fundamental_model)
#'
#' # Fit a Fundamental Sector Factor Model with Intercept
#'
#' sector_model <- fitFfm(data = stocks_factors,
#' asset.var = "TickerLast",
#' ret.var = "Return",
#' date.var = "Date",
#' exposure.vars = c("Sector", "BP"),
#' addIntercept = TRUE)
#'
#' summary(sector_model)
#' }
#'
#'
#' @export
fitFfm <- function(data, asset.var, ret.var, date.var, exposure.vars,
weight.var = NULL,
fit.method = c("LS","WLS","Rob","W-Rob"),
rob.stats = FALSE,
full.resid.cov = FALSE,
z.score = c("none", "crossSection", "timeSeries"),
addIntercept = FALSE,
lagExposures = TRUE,
resid.scaleType = "stdDev",
lambda = 0.9,
GARCH.params = list(omega = 0.09, alpha = 0.1, beta = 0.81),
GARCH.MLE = FALSE,
stdReturn = FALSE, analysis = c("none", "ISM", "NEW"),
targetedVol = 0.06,
...) {
# record the call as an element to be returned
this.call <- match.call()
# set defaults and check input validity
if (missing(data) || !is.data.frame(data)) {
stop("Invalid args: data must be a data.frame")
}
if (!(asset.var %in% colnames(data)) || length(unique(data[[asset.var]])) < 2) {
stop("Invalid args: data must contain at least 2 assets in asset.var")
}
fit.method = fit.method[1]
if (!(fit.method %in% c("LS","WLS","Rob","W-Rob"))) {
stop("Invalid args: fit.method must be 'LS', 'WLS', 'Rob' or 'W-Rob'")
}
if (missing(asset.var) || !is.character(asset.var)) {
stop("Invalid args: asset.var must be a character string")
}
if (missing(date.var) || !is.character(date.var)) {
stop("Invalid args: date.var must be a character string")
}
if (missing(ret.var) || !is.character(ret.var)) {
stop("Invalid args: ret.var must be a character string")
}
if (missing(exposure.vars) || !is.character(exposure.vars)) {
stop("Invalid args: exposure.vars must be a character vector")
}
if (ret.var %in% exposure.vars) {
stop("Invalid args: ret.var can not also be an exposure")
}
if (!is.null(weight.var) && !is.character(weight.var)) {
stop("Invalid args: weight.var must be a character string")
}
if (!is.logical(rob.stats) || length(rob.stats) != 1) {
stop("Invalid args: control parameter 'rob.stats' must be logical")
}
if (!is.logical(full.resid.cov) || length(full.resid.cov) != 1) {
stop("Invalid args: control parameter 'full.resid.cov' must be logical")
}
if (!(resid.scaleType %in% c("stdDev","EWMA","robEWMA", "GARCH"))) {
stop("Invalid args: resid.scaleType must be 'stdDev','EWMA','robEWMA', or 'GARCH'")
}
if ((resid.scaleType != "stdDev") && !(fit.method %in% c("WLS","W-Rob"))) {
stop("Invalid args: resid.scaleType must be used with WLS or W-Rob")
}
if (!is.list(GARCH.params)) {
stop("Invalid args: parameter 'GARCH.params' must be a list")
}
if (!is.logical(stdReturn)) {
stop("Invalid args: stdReturn must be either 'TRUE' or 'FALSE' ")
}
z.score = z.score[1]
if (!(z.score %in% c("none", "crossSection", "timeSeries")) || length(z.score) != 1) {
stop("Invalid args: control parameter 'z.score' must be either crossSection or timeSeries")
}
analysis = analysis[1]
if (!(analysis %in% c("none", "ISM", "NEW")) || length(z.score) != 1) {
stop("Invalid args: control parameter 'analysis' must be either ISM or NEW")
}
# ensure dates are in required format
data[[date.var]] <- as.Date(data[[date.var]])
# extract unique time periods from data
time.periods <- unique(data[[date.var]])
TP <- length(time.periods)
if (TP < 2) {
stop("Invalid args: at least 2 unique time periods are required to fit the
factor model")
}
# order data.frame by date.var
spec1 <- specFfm(data = data, asset.var = asset.var, ret.var = ret.var,
date.var = date.var, exposure.vars = exposure.vars,
weight.var = weight.var,
addIntercept = addIntercept , rob.stats = rob.stats)
# Standardize the returns if stdReturn = TRUE
if (stdReturn) {
standardizeReturns(specObj = spec1, GARCH.params = GARCH.params)
}
# check number & type of exposure; convert character exposures to dummy vars
which.numeric <- sapply(data.frame(data)[,exposure.vars,drop=FALSE], is.numeric)
exposures.num <- exposure.vars[which.numeric]
exposures.char <- exposure.vars[!which.numeric]
if ((length(exposures.char) > 1) && !addIntercept) {
stop("Invalid args: two categorical factor model with Market(Interecept) is currently not handled")
}
if (length(exposures.char) > 2)
{
stop("Invalid args: currently supports up to two categorical variables")
}
if(lagExposures)
{
spec1 = lagExposures(spec1)
}
spec1 = standardizeExposures(specObj = spec1, Std.Type = z.score, lambda = lambda)
# fit the model
mdlFit <- fitFfmDT(ffMSpecObj = spec1, fit.method = fit.method, resid.scaleType = resid.scaleType ,
lambda = lambda, GARCH.params = GARCH.params , GARCH.MLE = GARCH.MLE , ...)
# extract regression results
regStats <- extractRegressionStats(specObj = spec1, fitResults = mdlFit, full.resid.cov = full.resid.cov)
result <- convert(SpecObj = spec1, FitObj = mdlFit, RegStatsObj = regStats)
# create list of return values.
return(result)
}
### function to calculate z-scores for numeric exposure i using weights w
## x is a data.frame object, i is a character string and w has same length as x
# rob.stats is a logical argument to compute robust location and scale
zScore <- function(x, i, w, rob.stats, z.score, asset.names) {
if (grepl(z.score, "crossSection")) {
if (rob.stats) {
x_bar <- median(w * x[[i]])
(x[[i]] - x_bar)/mad(x[[i]], center = x_bar)
} else {
x_bar <- mean(w * x[[i]])
n <- length(x[[i]])
# use equal weighted squared deviation about the weighted mean
(x[[i]] - x_bar)/sqrt(sum((x[[i]] - x_bar) ^ 2)/(n - 1))
}
} else {
N <- length(asset.names)
exposures <- matrix(w * x[[i]], nrow = N)
sigmaEWMA <- stdExpo <- exposures
meanExp <- apply(exposures, 1, mean)
sigmaExp <- apply(exposures, 1, sd)
for (j in 1:N) {
ts <- (exposures[j, ] - meanExp[j])^2
var_past_2 <- sigmaExp[j] ^ 2
sigmaEWMA[j, ] <- sapply(ts, function(x) var_past_2 <<- 0.10 * x + 0.90 * var_past_2)
if (any(sigmaEWMA[j, ] == 0)) {
sigmaEWMA[j, ] <- 1
}
}
as.vector((exposures - meanExp) / sqrt(sigmaEWMA))
}
}
#' @param object a fit object of class \code{ffm} which is returned by
#' \code{fitFfm}
#' @rdname fitFfm
#' @method coef ffm
#' @export
coef.ffm <- function(object, ...) {
# these are the last period factor exposures
# already computed through fitFfm
return(object$beta)
}
#' @rdname fitFfm
#' @method fitted ffm
#' @export
fitted.ffm <- function(object, ...) {
# get fitted values for all assets in each time period
# transpose and convert into xts/zoo objects
fitted.xts <- PerformanceAnalytics::checkData(t(sapply(object$factor.fit, fitted)))
names(fitted.xts) <- object$asset.names
return(fitted.xts)
}
#' @rdname fitFfm
#' @method residuals ffm
#' @export
residuals.ffm <- function(object, ...) {
return(object$residuals)
}