-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_linear_regression.py
119 lines (92 loc) · 3.58 KB
/
test_linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
from rawsight.datasets import load_housing_data, Dataset
from typing import Callable
from sklearn.linear_model import SGDRegressor
from rawsight.cost_functions import least_squares_cost_function
from rawsight.cost_functions.cost_functions import _least_squares_cost
from rawsight.optimizers import (
batch_gradient_descent,
regularized_batch_gradient_descent,
)
from rawsight.models import LinearModel, Model
from rawsight.regression import run_regression
from functools import partial
import pytest
Optimizer = Callable[..., Model]
def run_linear_regression(dataset: Dataset, optimizer: Optimizer) -> Model:
# regression = run_regression("linear", dataset.X_train, dataset.y_train optimizer)
model = LinearModel(w=1, b=0, n_features=dataset.X_train.shape[1])
return optimizer(dataset.X_train, dataset.y_train, model=model)
@pytest.fixture
def housing_data() -> Dataset:
dataset = load_housing_data()
dataset.normalize_features()
yield dataset
@pytest.fixture
def sgdr(housing_data):
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(housing_data.X_train, housing_data.y_train)
return sgdr
@pytest.fixture
def linear_regression(housing_data) -> Model:
bg = partial(
batch_gradient_descent,
cost_function=least_squares_cost_function,
learning_rate=0.03,
max_iter=20000,
)
model = run_linear_regression(housing_data, bg)
return model
@pytest.fixture
def linear_regression_regularized(housing_data) -> Model:
bgr = partial(
regularized_batch_gradient_descent,
cost_function=least_squares_cost_function,
learning_rate=0.01,
max_iter=20000,
regularization_param=0.15,
)
model = run_linear_regression(housing_data, bgr)
return model
def compare_to_sklearn(input_model: Model, sgdr_: SGDRegressor) -> None:
print(input_model.parameters[0])
assert isinstance(input_model.parameters[0], np.ndarray)
assert pytest.approx(input_model.parameters[0], abs=1) == np.array(sgdr_.coef_)
assert pytest.approx(np.atleast_1d(input_model.parameters[1]), abs=1) == np.array(
sgdr_.intercept_
)
def test_linear_regression_parameters_vs_sklearn(
linear_regression, linear_regression_regularized, sgdr
):
compare_to_sklearn(input_model=linear_regression, sgdr_=sgdr)
compare_to_sklearn(input_model=linear_regression_regularized, sgdr_=sgdr)
def test_regularized_better(
housing_data, sgdr, linear_regression, linear_regression_regularized
):
model = linear_regression
model_reg = linear_regression_regularized
assert isinstance(model, LinearModel)
assert isinstance(model_reg, LinearModel)
model_cost = _least_squares_cost(model.parameters[0], sgdr.coef_)
model_reg_cost = _least_squares_cost(model_reg.parameters[0], sgdr.coef_)
assert model_reg_cost <= model_cost
def test_parity_to_tensorflow():
import tensorflow as tf
from rawsight.datasets import load_tumor_simple
from rawsight.regression import LinearRegression
dataset = load_tumor_simple()
linear_layer = tf.keras.layers.Dense(
units=1, input_dim=1, activation="linear", name="linear"
)
p_lin = linear_layer(dataset.X_train)
linear_regression = LinearRegression(
x=dataset.X_train,
y=dataset.y_train,
w=linear_layer.get_weights()[0],
b=linear_layer.get_weights()[1],
)
assert all(abs(linear_regression.predict().reshape(-1, 1) - p_lin.numpy()) <= 0.01)
if __name__ == "__main__":
test_linear_regression_parameters_vs_sklearn()
test_regularized_better()
test_parity_to_tensorflow()