Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide and use non-NumPy histogram function making NumPy not needed anymore #95

Merged
merged 4 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions core/opl/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import math
import statistics
import time
import numpy


class WaitForDataAndSave:
Expand Down Expand Up @@ -156,6 +155,39 @@ def percentile(data, percent):
return d0 + d1


def create_bins(data, precision, bins_number=10):
bins = []
borders = []
min_data = min(data)
max_data = max(data)
bin_size = (max_data - min_data) / bins_number

borders.append(min_data)
for x in range(bins_number):
bins.append((min_data + (bin_size * x), min_data + (bin_size * (x + 1))))
borders.append(min_data + (bin_size * (x + 1)))

return bins, borders


def find_bin(value, bins):
for i in range(0, len(bins)):
if bins[i][0] <= value < bins[i][1]:
return i
return -1


def histogram(data, precision=1):
bins, borders = create_bins(data, precision)
counts = [0] * len(bins)

for value in data:
bin_index = find_bin(value, bins)
counts[bin_index] += 1

return counts, borders


def data_stats(data):
if len(data) == 0:
return {"samples": 0}
Expand Down Expand Up @@ -201,7 +233,7 @@ def data_stats(data):


def get_hist(data):
hist_counts, hist_borders = numpy.histogram(data)
hist_counts, hist_borders = histogram(data)
hist_counts = [float(i) for i in hist_counts]
hist_borders = [float(i) for i in hist_borders]
out = []
Expand Down
36 changes: 34 additions & 2 deletions opl/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import math
import statistics
import time
import numpy


class WaitForDataAndSave:
Expand Down Expand Up @@ -156,6 +155,39 @@ def percentile(data, percent):
return d0 + d1


def create_bins(data, precision, bins_number=10):
bins = []
borders = []
min_data = min(data)
max_data = max(data)
bin_size = (max_data - min_data) / bins_number

borders.append(min_data)
for x in range(bins_number):
bins.append((min_data + (bin_size * x), min_data + (bin_size * (x + 1))))
borders.append(min_data + (bin_size * (x + 1)))

return bins, borders


def find_bin(value, bins):
for i in range(0, len(bins)):
if bins[i][0] <= value < bins[i][1]:
return i
return -1


def histogram(data, precision=1):
bins, borders = create_bins(data, precision)
counts = [0] * len(bins)

for value in data:
bin_index = find_bin(value, bins)
counts[bin_index] += 1

return counts, borders


def data_stats(data):
if len(data) == 0:
return {"samples": 0}
Expand Down Expand Up @@ -201,7 +233,7 @@ def data_stats(data):


def get_hist(data):
hist_counts, hist_borders = numpy.histogram(data)
hist_counts, hist_borders = histogram(data)
hist_counts = [float(i) for i in hist_counts]
hist_borders = [float(i) for i in hist_borders]
out = []
Expand Down
71 changes: 48 additions & 23 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,71 @@


class TestSkelet(unittest.TestCase):

def test_data_stats(self):
stats = opl.data.data_stats([0, 1, 1, 2, 2, 1, 1, 0])
self.assertEqual(stats['samples'], 8)
self.assertEqual(stats['min'], 0)
self.assertEqual(stats['mean'], 1)
self.assertEqual(stats['max'], 2)
self.assertEqual(stats['range'], 2)
self.assertEqual(stats['percentile25'], 0.75)
self.assertEqual(stats['percentile75'], 1.25)
self.assertEqual(stats['iqr'], 0.5)
self.assertEqual(stats["samples"], 8)
self.assertEqual(stats["min"], 0)
self.assertEqual(stats["mean"], 1)
self.assertEqual(stats["max"], 2)
self.assertEqual(stats["range"], 2)
self.assertEqual(stats["percentile25"], 0.75)
self.assertEqual(stats["percentile75"], 1.25)
self.assertEqual(stats["iqr"], 0.5)

def test_data_stats_empty(self):
stats = opl.data.data_stats([])
self.assertEqual(stats['samples'], 0)
self.assertEqual(stats["samples"], 0)
self.assertEqual(len(stats), 1)

def test_data_stats_short(self):
stats = opl.data.data_stats([1])
self.assertEqual(stats['samples'], 1)
self.assertEqual(stats['stdev'], 0.0)
self.assertEqual(stats["samples"], 1)
self.assertEqual(stats["stdev"], 0.0)

def test_data_stats_datetime(self):
data = [
datetime.datetime.fromisoformat('2021-03-22T12:00:00.000000+00:00'),
datetime.datetime.fromisoformat('2021-03-22T11:50:00.000000+00:00'),
datetime.datetime.fromisoformat('2021-03-22T11:30:00.000000+00:00'),
datetime.datetime.fromisoformat('2021-03-22T11:00:00.000000+00:00'),
datetime.datetime.fromisoformat("2021-03-22T12:00:00.000000+00:00"),
datetime.datetime.fromisoformat("2021-03-22T11:50:00.000000+00:00"),
datetime.datetime.fromisoformat("2021-03-22T11:30:00.000000+00:00"),
datetime.datetime.fromisoformat("2021-03-22T11:00:00.000000+00:00"),
]
stats = opl.data.data_stats(data)
self.assertEqual(stats['samples'], 4)
self.assertEqual(stats['max'], datetime.datetime.fromisoformat('2021-03-22T12:00:00.000000+00:00'))
self.assertEqual(stats['min'], datetime.datetime.fromisoformat('2021-03-22T11:00:00.000000+00:00'))
self.assertEqual(stats['range'].total_seconds(), 3600)
self.assertEqual(stats["samples"], 4)
self.assertEqual(
stats["max"],
datetime.datetime.fromisoformat("2021-03-22T12:00:00.000000+00:00"),
)
self.assertEqual(
stats["min"],
datetime.datetime.fromisoformat("2021-03-22T11:00:00.000000+00:00"),
)
self.assertEqual(stats["range"].total_seconds(), 3600)

def test_get_hist(self):
hist = opl.data.get_hist([0, 1, 1, 2, 2, 1, 1, 0])
self.assertEqual(
hist,
[
((0.0, 0.2), 2.0),
((0.2, 0.4), 0.0),
((0.4, 0.6000000000000001), 0.0),
((0.6000000000000001, 0.8), 0.0),
((0.8, 1.0), 0.0),
((1.0, 1.2000000000000002), 4.0),
((1.2000000000000002, 1.4000000000000001), 0.0),
((1.4000000000000001, 1.6), 0.0),
((1.6, 1.8), 0.0),
((1.8, 2.0), 2.0),
],
)

def test_get_rps(self):
rps_vals = opl.data.get_rps([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], bucket_size=10, granularity=1)
rps_vals = opl.data.get_rps(
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], bucket_size=10, granularity=1
)
self.assertEqual(len(rps_vals), 10)
self.assertEqual(sum(rps_vals)/len(rps_vals), 1.0)
self.assertEqual(sum(rps_vals) / len(rps_vals), 1.0)

rps_vals = opl.data.get_rps([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
self.assertEqual(len(rps_vals), 5)
self.assertEqual(sum(rps_vals)/len(rps_vals), 1.0)
self.assertEqual(sum(rps_vals) / len(rps_vals), 1.0)