This repository has been archived by the owner on Sep 21, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 33
/
linreg_noisy_stats.rs
137 lines (105 loc) · 4.53 KB
/
linreg_noisy_stats.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
use core::iter::Iterator;
/// Beginnings of DP Linear Regression
/// Borrowing heavily from the crate 'linreg'
use smartnoise_validator::errors::*;
use smartnoise_validator::Float;
use crate::utilities::mechanisms::laplace_mechanism;
/// Calculates "NoisyStat", which adds Laplace noise to the OLS sufficient statistics
///
fn noisy_stats_linreg(
data_x: Vec<Float>, data_y: Vec<Float>,
epsilon: Float, enforce_constant_time: bool,
) -> Result<(Float, Float)> {
if data_x.len() != data_y.len() {
return Err("predictors and targets must share same length".into())
}
let data_size: Float = data_x.len() as Float;
let mean = |data: &Vec<Float>| data.iter().sum::<Float>() / data.len() as Float;
let x_mean = mean(&data_x);
let y_mean = mean(&data_y);
let delta: Float = 1.0 - 1.0 / data_size;
let laplace_1: Float = laplace_mechanism(epsilon, 3.0 * delta, enforce_constant_time)?;
let laplace_2: Float = laplace_mechanism(epsilon, 3.0 * delta, enforce_constant_time)?;
let xxm2: f64 = data_x.iter()
.map(|x| (x - x_mean).powi(2))
.sum();
let xmym2: f64 = data_x.iter().zip(data_y)
.map(|(x, y)| (x - x_mean) * (y - y_mean))
.sum();
let slope = (xmym2 + laplace_1) / (xxm2 + laplace_2);
let delta_2 = (1.0 / data_size) * (1.0 + slope.abs());
let laplace_3: Float = laplace_mechanism(epsilon, 3.0 * delta_2, enforce_constant_time)?;
let intercept = y_mean - slope * x_mean + laplace_3;
// we check for divide-by-zero after the fact
if !slope.is_finite() {
return Err("Too Steep".into());
}
Ok((slope, intercept))
}
/// Calculate noisy linreg, then return "quartiles" consistent with implementation from paper
///
pub fn noisy_stats(
data_x: Vec<Float>, data_y: Vec<Float>,
epsilon: Float, enforce_constant_time: bool,
) -> Result<(Float, Float)> {
let (slope, intercept) = noisy_stats_linreg(data_x, data_y, epsilon, enforce_constant_time)?;
Ok((0.25 * slope + intercept, 0.75 * slope + intercept))
}
#[cfg(test)]
mod tests {
use std::vec::Vec;
use super::*;
#[test]
#[allow(unused_must_use)]
#[should_panic]
fn unequal_x_and_y_test() {
let x: Vec<Float> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let y: Vec<Float> = vec![2.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0];
let epsilon = 0.1;
let enforce_constant_time = false;
noisy_stats_linreg(x, y, epsilon, enforce_constant_time).unwrap();
}
#[test]
fn noisy_stats_completes_test() {
let x: Vec<Float> = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let y: Vec<Float> = vec![2.0, 4.0, 5.0, 6.0, 7.0];
let epsilon = 0.1;
let enforce_constant_time = false;
let result = noisy_stats_linreg(x, y, epsilon, enforce_constant_time);
// This is, admittedly, not the greatest test, but it does ensure that noisy_stats
// is returning values without panicking.
assert!(!result.is_err());
}
#[test]
fn test_large_epsilon_test() {
for epsilon in [1.0, 10.0, 100.0, 10000000.0].iter() {
// Create data which describes y = 2x
let x: Vec<Float> = (0..1000).map(Float::from).collect::<Vec<Float>>();
let y: Vec<Float> = (0..1000).map(|x| 2 * x).map(Float::from).collect::<Vec<Float>>();
let true_slope = 2.0;
let true_intercept = 0.0;
let enforce_constant_time = false;
let (slope, intercept) = noisy_stats_linreg(x, y, *epsilon, enforce_constant_time).unwrap();
let slope_diff = (slope - true_slope).abs();
let intercept_diff = (intercept - true_intercept).abs();
// println!("{} {} {}", slope_diff, intercept_diff, 1.0/epsilon);
assert!(slope_diff < 1.0 / epsilon);
assert!(intercept_diff < 1.0 / epsilon);
}
}
#[test]
fn quartiles_test() {
let x: Vec<Float> = (0..1000).map(Float::from).collect::<Vec<Float>>();
let y: Vec<Float> = (0..1000).map(|x| 2 * x).map(Float::from).collect::<Vec<Float>>();
let true_slope = 2.0;
let true_intercept = 0.0;
let base_p25 = 0.25 * true_slope + true_intercept;
let base_p75 = 0.75 * true_slope + true_intercept;
let epsilon = 10.0;
let enforce_constant_time = false;
let (p_25, p_75) = noisy_stats(x, y, epsilon, enforce_constant_time).unwrap();
// println!("{} {} {}", base_p75, p_75, 1.0/epsilon);
assert!((base_p25 - p_25).abs() < 1.0 / epsilon);
assert!((base_p75 - p_75).abs() < 1.0 / epsilon);
}
}