-
-
Notifications
You must be signed in to change notification settings - Fork 77
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Implement Accumulator Framework and Compound Accumulator for aggregation #47
Changes from all commits
53d5241
161ee09
98efeaf
8763009
d78506f
cf54652
2a3dc08
a0051da
7605b1c
6360c90
52d0577
2ca2235
7dfffb0
54144fc
b5353bf
33aed25
992e0ef
22b4077
0eb50cb
f59e2c7
ed0f632
591b620
0546975
b2f951b
411fc06
e277898
d7567f8
963d943
8d7f420
d41bef8
99ade2e
05717b4
041746c
12c08c9
daefd5f
58abd55
f029d51
3dbe745
24d9869
cb62ffd
d77c286
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import abc | ||
import typing | ||
import pickle | ||
from functools import reduce | ||
|
||
|
||
def merge(accumulators: typing.Iterable['Accumulator']) -> 'Accumulator': | ||
"""Merges the accumulators.""" | ||
return reduce(lambda acc1, acc2: acc1.add_accumulator(acc2), accumulators) | ||
|
||
|
||
class Accumulator(abc.ABC): | ||
"""Base class for all accumulators. | ||
|
||
Accumulators are objects that encapsulate aggregations and computations of | ||
differential private metrics. | ||
""" | ||
|
||
@abc.abstractmethod | ||
def add_value(self, value): | ||
"""Adds the value to each of the accumulator. | ||
Args: | ||
value: value to be added. | ||
|
||
Returns: self. | ||
""" | ||
pass | ||
|
||
@abc.abstractmethod | ||
def add_accumulator(self, accumulator: 'Accumulator') -> 'Accumulator': | ||
"""Merges the accumulator to self and returns self. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add comment: That sub-class implementation is responsible for checking that types of self and accumulator are the same. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
|
||
Sub-class implementation is responsible for checking that types of | ||
self and accumulator are the same. | ||
Args: | ||
accumulator: | ||
|
||
Returns: self | ||
""" | ||
pass | ||
|
||
@abc.abstractmethod | ||
def compute_metrics(self): | ||
pass | ||
|
||
def serialize(self): | ||
return pickle.dumps(self) | ||
|
||
@classmethod | ||
def deserialize(cls, serialized_obj: str): | ||
deserialized_obj = pickle.loads(serialized_obj) | ||
if not isinstance(deserialized_obj, cls): | ||
raise TypeError("The deserialized object is not of the right type.") | ||
return deserialized_obj | ||
|
||
|
||
class CompoundAccumulator(Accumulator): | ||
"""Accumulator for computing multiple metrics. | ||
|
||
CompoundAccumulator contains one or more accumulators of other types for | ||
computing multiple metrics. | ||
For example it can contain [CountAccumulator, SumAccumulator]. | ||
CompoundAccumulator delegates all operations to the internal accumulators. | ||
""" | ||
|
||
def __init__(self, accumulators: typing.Iterable['Accumulator']): | ||
self.accumulators = accumulators | ||
|
||
def add_value(self, value): | ||
for accumulator in self.accumulators: | ||
accumulator.add_value(value) | ||
return self | ||
|
||
def add_accumulator(self, accumulator: 'CompoundAccumulator') -> \ | ||
'CompoundAccumulator': | ||
"""Merges the accumulators of the CompoundAccumulators. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: add a blank line after this line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
The expectation is that the internal accumulators are of the same type and | ||
are in the same order.""" | ||
|
||
if len(accumulator.accumulators) != len(self.accumulators): | ||
raise ValueError( | ||
"Accumulators in the input are not of the same size." | ||
+ f" Expected size = {len(self.accumulators)}" | ||
+ f" received size = {len(accumulator.accumulators)}.") | ||
|
||
for pos, (base_accumulator_type, to_add_accumulator_type) in enumerate( | ||
zip(self.accumulators, accumulator.accumulators)): | ||
if type(base_accumulator_type) != type(to_add_accumulator_type): | ||
raise TypeError("The type of the accumulators don't match at " | ||
f"index {pos}. {type(base_accumulator_type).__name__} " | ||
f"!= {type(to_add_accumulator_type).__name__}.") | ||
|
||
for (base_accumulator, to_add_accumulator) in zip(self.accumulators, | ||
accumulator.accumulators): | ||
base_accumulator.add_accumulator(to_add_accumulator) | ||
return self | ||
|
||
def compute_metrics(self): | ||
"""Computes and returns a list of metrics computed by internal | ||
accumulators.""" | ||
return [accumulator.compute_metrics() for accumulator in self.accumulators] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
"""Adapters for working with pipeline frameworks.""" | ||
|
||
import random | ||
import collections | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably it's an incorrect merge in git. Please return back There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
import numpy as np | ||
|
||
import abc | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
import unittest | ||
import pipeline_dp | ||
import typing | ||
import numpy as np | ||
from pipeline_dp.accumulator import Accumulator | ||
from pipeline_dp.accumulator import merge | ||
from pipeline_dp.accumulator import CompoundAccumulator | ||
|
||
|
||
class CompoundAccumulatorTest(unittest.TestCase): | ||
|
||
def test_with_mean_and_sum_squares(self): | ||
mean_acc = MeanAccumulator() | ||
sum_squares_acc = SumOfSquaresAccumulator() | ||
compound_accumulator = CompoundAccumulator( | ||
[mean_acc, sum_squares_acc]) | ||
|
||
compound_accumulator.add_value(3) | ||
compound_accumulator.add_value(4) | ||
|
||
computed_metrics = compound_accumulator.compute_metrics() | ||
self.assertTrue( | ||
isinstance(compound_accumulator, CompoundAccumulator)) | ||
self.assertEqual(len(computed_metrics), 2) | ||
self.assertEqual(computed_metrics, [3.5, 25]) | ||
|
||
def test_adding_accumulator(self): | ||
mean_acc1 = MeanAccumulator().add_value(5) | ||
sum_squares_acc1 = SumOfSquaresAccumulator().add_value(5) | ||
compound_accumulator = CompoundAccumulator([mean_acc1, | ||
sum_squares_acc1]) | ||
|
||
mean_acc2 = MeanAccumulator() | ||
sum_squares_acc2 = SumOfSquaresAccumulator() | ||
to_be_added_compound_accumulator = CompoundAccumulator( | ||
[mean_acc2, sum_squares_acc2]) | ||
|
||
to_be_added_compound_accumulator.add_value(4) | ||
|
||
compound_accumulator.add_accumulator(to_be_added_compound_accumulator) | ||
compound_accumulator.add_value(3) | ||
|
||
computed_metrics = compound_accumulator.compute_metrics() | ||
self.assertEqual(len(computed_metrics), 2) | ||
self.assertEqual(computed_metrics, [4, 50]) | ||
|
||
def test_adding_mismatched_accumulator_order_raises_exception(self): | ||
mean_acc1 = MeanAccumulator().add_value(11) | ||
sum_squares_acc1 = SumOfSquaresAccumulator().add_value(1) | ||
mean_acc2 = MeanAccumulator().add_value(22) | ||
sum_squares_acc2 = SumOfSquaresAccumulator().add_value(2) | ||
|
||
base_compound_accumulator = CompoundAccumulator( | ||
[mean_acc1, sum_squares_acc1]) | ||
to_add_compound_accumulator = CompoundAccumulator( | ||
[sum_squares_acc2, mean_acc2]) | ||
|
||
with self.assertRaises(TypeError) as context: | ||
base_compound_accumulator.add_accumulator(to_add_compound_accumulator) | ||
self.assertEqual("The type of the accumulators don't match at index 0. " | ||
"MeanAccumulator != SumOfSquaresAccumulator.""", | ||
str(context.exception)) | ||
|
||
def test_adding_mismatched_accumulator_length_raises_exception(self): | ||
mean_acc1 = MeanAccumulator().add_value(11) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I like thorough testing! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your thorough review! |
||
sum_squares_acc1 = SumOfSquaresAccumulator().add_value(1) | ||
mean_acc2 = MeanAccumulator().add_value(22) | ||
|
||
base_compound_accumulator = CompoundAccumulator( | ||
[mean_acc1, sum_squares_acc1]) | ||
to_add_compound_accumulator = CompoundAccumulator( | ||
[mean_acc2]) | ||
|
||
with self.assertRaises(ValueError) as context: | ||
base_compound_accumulator.add_accumulator(to_add_compound_accumulator) | ||
self.assertEqual("Accumulators in the input are not of the same size. " | ||
"Expected size = 2 received size = 1.", | ||
str(context.exception)) | ||
|
||
def test_serialization_single_accumulator(self): | ||
accumulator = MeanAccumulator().add_value(5).add_value(6) | ||
|
||
serialized_obj = accumulator.serialize() | ||
deserialized_obj = Accumulator.deserialize(serialized_obj) | ||
|
||
self.assertIsInstance(deserialized_obj, MeanAccumulator) | ||
self.assertEqual(accumulator.sum, deserialized_obj.sum) | ||
self.assertEqual(accumulator.count, deserialized_obj.count) | ||
|
||
def test_serialization_compound_accumulator(self): | ||
mean_acc = MeanAccumulator().add_value(15) | ||
sum_squares_acc = SumOfSquaresAccumulator().add_value(1) | ||
compound_accumulator = CompoundAccumulator( | ||
[mean_acc, sum_squares_acc]) | ||
|
||
serialized_obj = compound_accumulator.serialize() | ||
deserialized_obj = Accumulator.deserialize(serialized_obj) | ||
|
||
self.assertIsInstance(deserialized_obj, CompoundAccumulator) | ||
self.assertEqual(len(deserialized_obj.accumulators), 2) | ||
self.assertIsInstance(deserialized_obj.accumulators[0], MeanAccumulator) | ||
self.assertIsInstance(deserialized_obj.accumulators[1], | ||
SumOfSquaresAccumulator) | ||
self.assertEqual(deserialized_obj.compute_metrics(), | ||
compound_accumulator.compute_metrics()) | ||
|
||
def test_serialization_with_incompatible_serialized_object(self): | ||
mean_accumulator = MeanAccumulator().add_value(15) | ||
|
||
serialized_obj = mean_accumulator.serialize() | ||
|
||
with self.assertRaises(TypeError) as context: | ||
SumOfSquaresAccumulator.deserialize(serialized_obj) | ||
self.assertEqual("The deserialized object is not of the right type.", | ||
str(context.exception)) | ||
|
||
|
||
class GenericAccumulatorTest(unittest.TestCase): | ||
|
||
def test_merge_accumulators(self): | ||
mean_accumulator1 = MeanAccumulator().add_value(15) | ||
mean_accumulator2 = MeanAccumulator().add_value(5) | ||
|
||
merged_accumulator = merge([mean_accumulator1, mean_accumulator2]) | ||
|
||
self.assertEqual(merged_accumulator.compute_metrics(), 10) | ||
|
||
def test_merge_diff_type_throws_type_error(self): | ||
mean_accumulator1 = MeanAccumulator().add_value(15) | ||
sum_squares_acc = SumOfSquaresAccumulator().add_value(1) | ||
|
||
with self.assertRaises(TypeError) as context: | ||
merge([mean_accumulator1, sum_squares_acc]) | ||
self.assertIn("The accumulator to be added is not of the same type.""", | ||
str(context.exception)) | ||
|
||
|
||
class MeanAccumulator(Accumulator): | ||
|
||
def __init__(self, accumulators: typing.Iterable[ | ||
'MeanAccumulator'] = None): | ||
self.sum = np.sum([concat_acc.sum | ||
for concat_acc in accumulators]) if accumulators else 0 | ||
self.count = np.sum([concat_acc.count | ||
for concat_acc in accumulators]) if accumulators else 0 | ||
|
||
def add_value(self, v): | ||
self.sum += v | ||
self.count += 1 | ||
return self | ||
|
||
def add_accumulator(self, | ||
accumulator: 'MeanAccumulator') -> 'MeanAccumulator': | ||
if not isinstance(accumulator, MeanAccumulator): | ||
raise TypeError("The accumulator to be added is not of the same type.") | ||
self.sum += accumulator.sum | ||
self.count += accumulator.count | ||
return self | ||
|
||
def compute_metrics(self): | ||
if self.count == 0: | ||
return float('NaN') | ||
return self.sum / self.count | ||
|
||
|
||
# Accumulator classes for testing | ||
class SumOfSquaresAccumulator(Accumulator): | ||
|
||
def __init__(self, accumulators: typing.Iterable[ | ||
'SumOfSquaresAccumulator'] = None): | ||
self.sum_squares = np.sum([concat_acc.sum_squares | ||
for concat_acc in | ||
accumulators]) if accumulators else 0 | ||
|
||
def add_value(self, v): | ||
self.sum_squares += v * v | ||
return self | ||
|
||
def add_accumulator(self, | ||
accumulator: 'SumOfSquaresAccumulator') -> 'SumOfSquaresAccumulator': | ||
if not isinstance(accumulator, SumOfSquaresAccumulator): | ||
raise TypeError("The accumulator to be added is not of the same type.") | ||
self.sum_squares += accumulator.sum_squares | ||
return self | ||
|
||
def compute_metrics(self): | ||
return self.sum_squares | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice implementation!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! :)