From 5c58232cadeede4311522b016fbd41e6b62d7070 Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sat, 15 Jul 2017 22:53:38 -0700 Subject: [PATCH 1/7] initial outline --- src/GLM.jl | 1 + src/glmfit.jl | 1 + src/glmtools.jl | 6 ++++++ src/linpred.jl | 5 ++++- src/lm.jl | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/GLM.jl b/src/GLM.jl index c3b69248..5af542b0 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -14,6 +14,7 @@ module GLM import Base: (\), cholfact, convert, cor, show, size import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderr, vcov, residuals, predict, fit, model_response, r2, r², adjr2, adjr², PValue import StatsFuns: xlogy + import StatsBase: FrequencyWeights, ProbabilityWeights, AnalyticWeights export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nobs, stderr, vcov, residuals, predict, fit, fit!, model_response, r2, r², adjr2, adjr² export # types diff --git a/src/glmfit.jl b/src/glmfit.jl index b9d532b1..a6ba09a6 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -1,3 +1,4 @@ +# TODO: expand weights to include fweights, pweights, aweights """ GlmResp diff --git a/src/glmtools.jl b/src/glmtools.jl index 1b4ea1aa..fc445c2b 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -98,3 +98,9 @@ loglik_obs(::Binomial, y, μ, wt, ϕ) = logpdf(Binomial(Int(wt), μ), Int(y*wt)) loglik_obs(::Gamma, y, μ, wt, ϕ) = wt*logpdf(Gamma(1/ϕ, μ*ϕ), y) loglik_obs(::Normal, y, μ, wt, ϕ) = wt*logpdf(Normal(μ, sqrt(ϕ)), y) loglik_obs(::Poisson, y, μ, wt, ϕ) = wt*logpdf(Poisson(μ), y) + +# TODO: combine fweights, pweights, aweights +function normalizeWeights(fweights::FrequencyWeights, pweights::ProbabilityWeights, + aweights::AnalyticWeights) + +end diff --git a/src/linpred.jl b/src/linpred.jl index d6f9f7c7..93567473 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -116,6 +116,8 @@ Base.cholfact!{T}(p::SparsePredChol{T}) = p.chol invchol(x::DensePred) = inv(cholfact!(x)) invchol(x::SparsePredChol) = cholfact!(x) \ eye(size(x.X, 2)) +# TODO: support fweights, pweights, and aweights +# This vcov represents σ^2 X'X vcov(x::LinPredModel) = scale!(invchol(x.pp), dispersion(x, true)) function cor(x::LinPredModel) @@ -153,7 +155,8 @@ function nobs(obj::LinPredModel) if isempty(obj.rr.wts) oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y)) else - sum(obj.rr.wts) + sum(obj.rr.wts) # This is the behavior for fweights + # TODO: add support for pweights and aweights end end diff --git a/src/lm.jl b/src/lm.jl index ff23e06e..0fc0b8da 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -1,3 +1,4 @@ +# TODO: expand weights to include fweights, pweights, aweights type LmResp{V<:FPVector} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) From 39e12662b250f3555f92a70d54f28f857912bd4d Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sat, 15 Jul 2017 23:16:31 -0700 Subject: [PATCH 2/7] augment constructors to have different kinds of weights --- src/glmfit.jl | 11 +++++++++-- src/lm.jl | 18 +++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index a6ba09a6..a1bd2322 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -18,13 +18,19 @@ immutable GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp offset::V "`wts:` prior case weights. Can be of length 0." wts::V + + fweights::FrequencyWeights + pweights::ProbabilityWeights + aweights::AnalyticWeights + "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V "`wrkresid`: working residuals for IRLS" wrkresid::V end -function GlmResp{V<:FPVector, D, L}(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) +function GlmResp{V<:FPVector, D, L}(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V, + fweights::FrequencyWeights, pweights::ProbabilityWeights, aweights::AnalyticWeights) if d == Binomial() for yy in y 0. <= yy <= 1. || throw(ArgumentError("$yy in y is not in [0,1]")) @@ -39,7 +45,8 @@ function GlmResp{V<:FPVector, D, L}(y::V, d::D, l::L, η::V, μ::V, off::V, wts: "lengths of η, μ, y and wts ($nη, $nμ, $(length(wts)), $n) are not equal")) lo = length(off) lo == 0 || lo == n || error("offset must have length $n or length 0") - res = GlmResp{V,D,L}(y, d, similar(y), η, μ, off, wts, similar(y), similar(y)) + res = GlmResp{V,D,L}(y, d, similar(y), η, μ, off, wts, similar(y), similar(y), + fweights, pweights, aweights) updateμ!(res, η) res end diff --git a/src/lm.jl b/src/lm.jl index 0fc0b8da..689c172c 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -3,19 +3,31 @@ type LmResp{V<:FPVector} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) wts::V # prior weights (may have length 0) + fweights::FrequencyWeights + pweights::ProbabilityWeights + aweights::AnalyticWeights y::V # response - function (::Type{LmResp{V}}){V}(mu::V, off::V, wts::V, y::V) + function (::Type{LmResp{V}}){V}(mu::V, off::V, wts::V, y::V; + fweights::FrequencyWeights, pweights::ProbabilityWeights, + aweights::AnalyticWeights) n = length(y) length(mu) == n || error("mismatched lengths of mu and y") ll = length(off) ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0") ll = length(wts) ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0") - new{V}(mu, off, wts, y) + ll = length(fweights) + ll == 0 || ll == n || error("length of fweights is $ll, must be $n or 0") + ll = length(pweights) + ll == 0 || ll == n || error("length of pweights is $ll, must be $n or 0") + ll = length(aweights) + ll == 0 || ll == n || error("length of aweights is $ll, must be $n or 0") + new{V}(mu, off, wts, y, fweights, pweights, aweights) end end convert{V<:FPVector}(::Type{LmResp{V}}, y::V) = - LmResp{V}(zeros(y), similar(y, 0), similar(y, 0), y) + LmResp{V}(zeros(y), similar(y, 0), similar(y, 0), y, + similar(y, 0), similar(y, 0), similar(y, 0)) function convert{T<:Real}(::Type{LmResp}, y::AbstractVector{T}) yy = float(y) From a010fd109a7245322f5cadc71e8a531006901805 Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sat, 15 Jul 2017 23:27:04 -0700 Subject: [PATCH 3/7] normalize weights --- src/glmtools.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/glmtools.jl b/src/glmtools.jl index fc445c2b..cf37fc73 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -102,5 +102,16 @@ loglik_obs(::Poisson, y, μ, wt, ϕ) = wt*logpdf(Poisson(μ), y) # TODO: combine fweights, pweights, aweights function normalizeWeights(fweights::FrequencyWeights, pweights::ProbabilityWeights, aweights::AnalyticWeights) - + nf = length(fweights) + np = length(pweights) + na = length(aweights) + if (nf != 0 & np == 0 & na == 0) + return fweights |> collect + else if (nf == 0 & np != 0 & na == 0) + return pweights |> collect + else if (nf == 0 & np == 0 & na != 0) + return aweights |> collect + else (nf != 0 & np != 0 & na == 0) + return (fweights .* pweights) |> collect + end end From 549045f5bf099e341e79439ce84b46bf5c8bdaed Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sat, 15 Jul 2017 23:38:21 -0700 Subject: [PATCH 4/7] utilize normalizeWeights in constructor --- src/glmfit.jl | 4 +++- src/lm.jl | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index a1bd2322..d0167b7a 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -45,7 +45,9 @@ function GlmResp{V<:FPVector, D, L}(y::V, d::D, l::L, η::V, μ::V, off::V, wts: "lengths of η, μ, y and wts ($nη, $nμ, $(length(wts)), $n) are not equal")) lo = length(off) lo == 0 || lo == n || error("offset must have length $n or length 0") - res = GlmResp{V,D,L}(y, d, similar(y), η, μ, off, wts, similar(y), similar(y), + res = GlmResp{V,D,L}(y, d, similar(y), η, μ, off, + normalizeWeights(fweights, pweights, aweights), + similar(y), similar(y), fweights, pweights, aweights) updateμ!(res, η) res diff --git a/src/lm.jl b/src/lm.jl index 689c172c..feb7c4a9 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -22,7 +22,8 @@ type LmResp{V<:FPVector} <: ModResp # response in a linear model ll == 0 || ll == n || error("length of pweights is $ll, must be $n or 0") ll = length(aweights) ll == 0 || ll == n || error("length of aweights is $ll, must be $n or 0") - new{V}(mu, off, wts, y, fweights, pweights, aweights) + new{V}(mu, off, normalizeWeights(fweights, pweights, aweights), y, + fweights, pweights, aweights) end end convert{V<:FPVector}(::Type{LmResp{V}}, y::V) = From b37648e8c4226fefb229c06e6f16a4fbbf6a1f49 Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sun, 16 Jul 2017 00:21:51 -0700 Subject: [PATCH 5/7] update nobs function --- src/linpred.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 93567473..3f2cc6af 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -152,10 +152,10 @@ For linear and generalized linear models, returns the number of rows, or, when prior weights are specified, the sum of weights. """ function nobs(obj::LinPredModel) - if isempty(obj.rr.wts) + if (length(obj.rr.fweights) == 0) oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y)) else - sum(obj.rr.wts) # This is the behavior for fweights + sum(obj.rr.fweights) # This is the behavior for fweights # TODO: add support for pweights and aweights end end From b1cc50159cd78a2b523b9a645b1c48cf9864a259 Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sun, 13 Aug 2017 10:05:45 -0700 Subject: [PATCH 6/7] add to constructor --- src/lm.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lm.jl b/src/lm.jl index feb7c4a9..4d0d5ea1 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -1,4 +1,5 @@ # TODO: expand weights to include fweights, pweights, aweights +# Example LmResp{Array{Float64,1}}(randn(5), randn(5), randn(5), randn(5)) type LmResp{V<:FPVector} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) @@ -7,7 +8,7 @@ type LmResp{V<:FPVector} <: ModResp # response in a linear model pweights::ProbabilityWeights aweights::AnalyticWeights y::V # response - function (::Type{LmResp{V}}){V}(mu::V, off::V, wts::V, y::V; + function (::Type{LmResp{V}}){V}(mu::V, off::V, wts::V, y::V, fweights::FrequencyWeights, pweights::ProbabilityWeights, aweights::AnalyticWeights) n = length(y) From cc5b5fa7bf6e7797607d80f3ca2b0f0d3689fd1a Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sun, 13 Aug 2017 10:06:03 -0700 Subject: [PATCH 7/7] handle 1 type of weight and multiple types of weights --- src/glmtools.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/glmtools.jl b/src/glmtools.jl index cf37fc73..85ca7974 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -105,13 +105,15 @@ function normalizeWeights(fweights::FrequencyWeights, pweights::ProbabilityWeigh nf = length(fweights) np = length(pweights) na = length(aweights) - if (nf != 0 & np == 0 & na == 0) + if (nf != 0 && np == 0 && na == 0) return fweights |> collect - else if (nf == 0 & np != 0 & na == 0) + elseif (nf == 0 && np != 0 && na == 0) return pweights |> collect - else if (nf == 0 & np == 0 & na != 0) + elseif (nf == 0 && np == 0 && na != 0) return aweights |> collect - else (nf != 0 & np != 0 & na == 0) + elseif (nf != 0 && np != 0) return (fweights .* pweights) |> collect + else + return zeros(Float64, 0) end end