-
Notifications
You must be signed in to change notification settings - Fork 1
/
statlib.py
163 lines (125 loc) · 4.85 KB
/
statlib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import os, sys
import numpy as np
from scipy import stats, linalg
from math import ceil
def RegLinWeightedMat(x, y, w,conf_interv=0.99, conf_slope = 0.95):
X = x*1.0
Y = y *1.0
W = w * 1.0
Y[np.isnan(W) | np.isnan(X)] = np.nan #check for NaNs
# X[np.isnan(W) | np.isnan(Y)] = np.nan #check for NaNs
W[np.isnan(Y) | np.isnan(X)] = np.nan #check for NaNs
sum_w = np.nansum(W, axis = 0)
moy_X_w = np.nansum(X*W, axis = 0)/sum_w
moy_Y_w = np.nansum(Y*W, axis = 0)/sum_w
mat_cross_product = W*(X-moy_X_w)*(Y-moy_Y_w)
sum_mat_cross_product = np.nansum(mat_cross_product, axis = 0)
mat_X_squared = W*(X-moy_X_w)**2
sum_mat_X_squared = np.nansum(mat_X_squared, axis = 0)
beta1 = sum_mat_cross_product/sum_mat_X_squared
beta0 = moy_Y_w - beta1*moy_X_w
alpha_interv=1.-conf_interv
alpha_slope = 1.-conf_slope
Y_pred = beta1*X+beta0
n = np.sum(~np.isnan(X),axis=0)
SSX = sum_mat_X_squared
SXY = np.sqrt(np.nansum(W*(Y-Y_pred)**2, axis = 0)/(n-2))
SE_slope = SXY/np.sqrt(SSX)
hi = 1./n+(X-moy_X_w)**2/SSX
# quantile of student's t distribution for p=1-alpha/2
q_interv = stats.t.ppf(1.-alpha_interv/2, n-2)
q_slope = stats.t.ppf(1.-alpha_slope/2, n-2)
# get the upper and lower CI:
dy = q_interv*SXY*np.sqrt(hi)
Yl = Y_pred-dy
Yu = Y_pred+dy
# calculate incert on slope
incert_slope = q_slope*SE_slope
return beta1, beta0, incert_slope, Yl, Yu
def kernel_exclass(xi,x0,a1,kappa=0.5):
return np.exp(-(np.abs(xi - x0 )/a1) ** kappa)
def kernel_exp(xi,x0,a1):
return np.exp(-np.abs(xi-x0)/a1)
def kernel_gaussian(xi,x0,a1):
return np.exp(-((xi-x0)/a1)**2)
#TODO: kernel spherical?
def lowess_homemade_kern(x, y, w, a1,kernel='Exp'):
"""
#inspired by: https://xavierbourretsicotte.github.io/loess.html
homebaked lowess with variogram kernel + heteroscedasticity of observations with error
:param x:
:param y:
:param w: heteroscedastic weights (inverse of variance)
:param a1: range of the kernel (in variogram terms)
:param kernel: kernel function
:return:
"""
n = len(x)
yest = np.zeros(n)
err_yest = np.zeros(n)
if kernel == 'Gau':
kernel_fun = kernel_gaussian
elif kernel =='Exp':
kernel_fun = kernel_exp
elif kernel == 'Exc':
kernel_fun = kernel_exclass
else:
print('Kernel not recognized.')
sys.exit()
# Initializing all weights from the bell shape kernel function
W = np.array([kernel_fun(x,x[i],a1)*w for i in range(n)]).T
# # Looping through all x-points
# for i in range(n):
# weights = w[:, i]
# b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
# A = np.array([[np.sum(weights), np.sum(weights * x)],
# [np.sum(weights * x), np.sum(weights * x * x)]])
# theta = linalg.solve(A, b)
# yest[i] = theta[0] + theta[1] * x[i]
X = np.array([x for i in range(n)]).T
Y = np.array([y for i in range(n)]).T
beta1, beta0, _, Yl, Yu = RegLinWeightedMat(X,Y,W,conf_interv=0.68)
for i in range(n):
yest[i] = beta1[i]*x[i]+beta0[i]
err_yest[i] = (Yu[i,i] - Yl[i,i])/2
return yest, err_yest
def lowess_ag(x, y, f=2. / 3., iter=3):
"""lowess(x, y, f=2./3., iter=3) -> yest
Lowess smoother: Robust locally weighted regression.
The lowess function fits a nonparametric regression curve to a scatterplot.
The arrays x and y contain an equal number of elements; each pair
(x[i], y[i]) defines a data point in the scatterplot. The function returns
the estimated (smooth) values of y.
The smoothing span is given by f. A larger value for f will result in a
smoother curve. The number of robustifying iterations is given by iter. The
function will run faster with a smaller number of iterations.
"""
n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iter):
for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2
return yest
x = np.linspace(0,1,100)
noise = np.random.normal(loc = 0, scale = .25, size = 100)
y = np.sin(x * 1.5 * np.pi )
y_noise = y + noise
if __name__ == '__main__':
x=elev_bin
y=mean_bin
w=1/(nonvoid_err_bin**2)
a1=200.