Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ignore hours #82

Merged
merged 4 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions openshift_metrics/invoice.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import math
from dataclasses import dataclass, field
from collections import namedtuple
from typing import List
from typing import List, Tuple, Optional
from decimal import Decimal, ROUND_HALF_UP
import datetime

Expand Down Expand Up @@ -112,15 +112,32 @@ def get_service_unit(self) -> ServiceUnit:

return ServiceUnit(su_type, su_count, determining_resource)

def get_runtime(self) -> Decimal:
def get_runtime(
self, ignore_times: List[Tuple[datetime.datetime, datetime.datetime]] = None
) -> Decimal:
"""Return runtime eligible for billing in hours"""
return Decimal(self.duration) / 3600

total_runtime = self.duration

if ignore_times:
for ignore_start_date, ignore_end_date in ignore_times:
ignore_start = int(ignore_start_date.timestamp())
ignore_end = int(ignore_end_date.timestamp())
if ignore_end <= self.start_time or ignore_start >= self.end_time:
continue
overlap_start = max(self.start_time, ignore_start)
overlap_end = min(self.end_time, ignore_end)

overlap_duration = max(0, overlap_end - overlap_start)
total_runtime = max(0, total_runtime - overlap_duration)

return Decimal(total_runtime) / 3600

@property
def end_time(self) -> int:
return self.start_time + self.duration

def generate_pod_row(self):
def generate_pod_row(self, ignore_times):
"""
This returns a row to represent pod data.
It converts the epoch_time stamps to datetime timestamps so it's more readable.
Expand All @@ -136,7 +153,7 @@ def generate_pod_row(self):
memory_request = self.memory_request.quantize(
Decimal(".0001"), rounding=ROUND_HALF_UP
)
runtime = self.get_runtime().quantize(Decimal(".0001"), rounding=ROUND_HALF_UP)
runtime = self.get_runtime(ignore_times).quantize(Decimal(".0001"), rounding=ROUND_HALF_UP)
return [
self.namespace,
start_time,
Expand Down Expand Up @@ -177,6 +194,7 @@ class ProjectInvoce:
intitution: str
institution_specific_code: str
rates: Rates
ignore_hours: Optional[List[Tuple[datetime.datetime, datetime.datetime]]] = None
su_hours: dict = field(
default_factory=lambda: {
SU_CPU: 0,
Expand All @@ -192,7 +210,7 @@ class ProjectInvoce:
def add_pod(self, pod: Pod) -> None:
"""Aggregate a pods data"""
su_type, su_count, _ = pod.get_service_unit()
duration_in_hours = pod.get_runtime()
duration_in_hours = pod.get_runtime(self.ignore_hours)
self.su_hours[su_type] += su_count * duration_in_hours

def get_rate(self, su_type) -> Decimal:
Expand Down
30 changes: 27 additions & 3 deletions openshift_metrics/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
"""

import argparse
from datetime import datetime
from datetime import datetime, UTC
import json
from typing import Tuple

from openshift_metrics import utils
from openshift_metrics.metrics_processor import MetricsProcessor
Expand All @@ -16,6 +17,20 @@ def compare_dates(date_str1, date_str2):
return date1 < date2


def parse_timestamp_range(timestamp_range: str) -> Tuple[datetime, datetime]:
try:
start_str, end_str = timestamp_range.split(",")
start_dt = datetime.fromisoformat(start_str).replace(tzinfo=UTC)
end_dt = datetime.fromisoformat(end_str).replace(tzinfo=UTC)

if start_dt > end_dt:
raise argparse.ArgumentTypeError("Ignore start time is after ignore end time")
return start_dt, end_dt
except ValueError:
raise argparse.ArgumentTypeError(
"Timestamp range must be in the format 'YYYY-MM-DDTHH:MM:SS,YYYY-MM-DDTHH:MM:SS'"
)

def main():
"""Reads the metrics from files and generates the reports"""
parser = argparse.ArgumentParser()
Expand All @@ -25,13 +40,21 @@ def main():
"--upload-to-s3",
action="store_true"
)
parser.add_argument(
"--ignore-hours",
knikolla marked this conversation as resolved.
Show resolved Hide resolved
type=parse_timestamp_range,
nargs="*",
help="List of timestamp ranges in UTC to ignore in the format 'YYYY-MM-DDTHH:MM:SS,YYYY-MM-DDTHH:MM:SS'"
)

args = parser.parse_args()
files = args.files

if args.output_file:
output_file = args.output_file
else:
output_file = f"{datetime.today().strftime('%Y-%m-%d')}.csv"
ignore_hours = args.ignore_hours

report_start_date = None
report_end_date = None
Expand Down Expand Up @@ -76,9 +99,10 @@ def main():
utils.write_metrics_by_namespace(
condensed_metrics_dict,
output_file,
report_month
report_month,
ignore_hours,
)
utils.write_metrics_by_pod(condensed_metrics_dict, "pod-" + output_file)
utils.write_metrics_by_pod(condensed_metrics_dict, "pod-" + output_file, ignore_hours)

if args.upload_to_s3:
primary_location = (
Expand Down
69 changes: 69 additions & 0 deletions openshift_metrics/tests/test_invoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from unittest import TestCase
from datetime import datetime
from decimal import Decimal

from openshift_metrics import invoice


class TestPodGetRuntime(TestCase):
def setUp(self):
"""Gives us a pod that starts at 2024-10-11 12:00 UTC and ends at 2024-10-11 20:00 UTC"""
self.pod = invoice.Pod(
pod_name="test-pod",
namespace="test-namespace",
start_time=int(datetime(2024, 10, 11, 12, 0).timestamp()),
duration=3600 * 8,
cpu_request=Decimal("1.0"),
gpu_request=Decimal(0),
memory_request=Decimal("4.0"),
gpu_type=None,
gpu_resource=None,
node_hostname="node-1",
node_model=None,
)

def test_no_ignore_times(self):
runtime = self.pod.get_runtime()
self.assertEqual(runtime, Decimal("8.0"))

def test_one_ignore_range(self):
ignore_range = [(datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 14, 0))]
self.assertEqual(self.pod.get_runtime(ignore_range), Decimal(7.0))

def test_multiple_ignore_times(self):
ignore_times = [
(datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 14, 0)),
(datetime(2024, 10, 11, 14, 0), datetime(2024, 10, 11, 15, 0)),
(datetime(2024, 10, 11, 19, 0), datetime(2024, 10, 11, 20, 0)),
]
self.assertEqual(self.pod.get_runtime(ignore_times), Decimal(5.0))

def test_ignore_times_outside_runtime(self):
ignore_times = [
(
datetime(2024, 10, 11, 10, 0),
datetime(2024, 10, 11, 11, 0),
), # before start
(datetime(2024, 10, 11, 20, 0), datetime(2024, 10, 11, 22, 0)), # after end
]
self.assertEqual(self.pod.get_runtime(ignore_times), Decimal(8.0))

def test_partial_overlap_ignore_range(self):
ignore_range = [
(datetime(2024, 10, 11, 10, 30), datetime(2024, 10, 11, 14, 30))
]
self.assertEqual(self.pod.get_runtime(ignore_range), Decimal(5.5))

def test_ignore_range_greater_than_pod_runtime(self):
ignore_range = [
(datetime(2024, 10, 11, 11, 00), datetime(2024, 10, 11, 21, 00))
]
self.assertEqual(self.pod.get_runtime(ignore_range), Decimal(0))

def test_runtime_is_never_negative(self):
ignore_times = [
(datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 17, 0)),
(datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 17, 0)),
(datetime(2024, 10, 11, 10, 0), datetime(2024, 10, 11, 22, 0)),
]
self.assertEqual(self.pod.get_runtime(ignore_times), Decimal(0.0))
97 changes: 97 additions & 0 deletions openshift_metrics/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from openshift_metrics import utils, invoice
import os
from datetime import datetime, UTC

class TestGetNamespaceAnnotations(TestCase):

Expand Down Expand Up @@ -289,6 +290,102 @@ def test_write_metrics_by_namespace_decimal(self, mock_gna):
self.assertEqual(tmp.read(), expected_output)


class TestWriteMetricsWithIgnoreHours(TestCase):
def setUp(self):
"""Creates a test dictionary with condensed data that can be used to test WriteMetricsByPod and WriteMetricsByNamespace"""
start_dt = int(datetime.fromisoformat("2024-04-10T11:00:00Z").timestamp())
self.ignore_times = [
(
datetime(2024, 4, 9, 11, 0, 0, tzinfo=UTC),
datetime(2024, 4, 10, 15, 0, 0, tzinfo=UTC),
),
(
datetime(2024, 4, 10, 22, 0, 0, tzinfo=UTC),
datetime(2024, 4, 11, 5, 0, 0, tzinfo=UTC)
),
]
HOUR = 60 * 60
self.test_metrics_dict = {
"namespace1": {
"pod1": { # runs from 2024-04-10T11:00:00Z to 2024-04-10T21:00:00Z - 2 SU * 6 billable hours
"metrics": {
start_dt: {
"cpu_request": 2,
"memory_request": 4 * 2**30,
"duration": 10 * HOUR,
},
}
},
},
"namespace2": {
"pod2": {
"metrics": {
start_dt: { # runs from 2024-04-10T11:00:00Z to 2024-04-11T11:00:00Z - 2 SU * 13 billable hours
"cpu_request": 2,
"memory_request": 4 * 2**30,
"duration": 24 * HOUR,
},
start_dt + 24 * HOUR: { # runs from 2024-04-11T11:00:00Z to 2024-04-13T11:00:00Z - 3 SU * 48 billable hours
"cpu_request": 3,
"memory_request": 4 * 2**30,
"duration": 48 * HOUR,
},
}
},
"pod3": { # runs from 2024-04-10T11:00:00Z to 2024-04-12T11:00:00Z - 1 SU * 37 billable hours
"gpu_type": invoice.GPU_A100_SXM4,
"metrics": {
start_dt: {
"cpu_request": 24,
"memory_request": 8 * 2**30,
"gpu_request": 1,
"gpu_type": invoice.GPU_A100_SXM4,
"gpu_resource": invoice.WHOLE_GPU,
"duration": 48 * HOUR,
},
},
},
},
}

@mock.patch("openshift_metrics.utils.get_namespace_attributes")
def test_write_metrics_by_namespace_with_ignore_hours(self, mock_gna):
mock_gna.return_value = {
"namespace1": {
"cf_pi": "PI1",
"cf_project_id": "123",
"institution_code": "cf-code-1",
},
"namespace2": {
"cf_pi": "PI2",
"cf_project_id": "456",
"institution_code": "cf-code-2",
},
}
expected_output = (
"Invoice Month,Project - Allocation,Project - Allocation ID,Manager (PI),Invoice Email,Invoice Address,Institution,Institution - Specific Code,SU Hours (GBhr or SUhr),SU Type,Rate,Cost\n"
"2023-01,namespace1,namespace1,PI1,,,,cf-code-1,12,OpenShift CPU,0.013,0.16\n"
"2023-01,namespace2,namespace2,PI2,,,,cf-code-2,170,OpenShift CPU,0.013,2.21\n"
"2023-01,namespace2,namespace2,PI2,,,,cf-code-2,37,OpenShift GPUA100SXM4,2.078,76.89\n"
)

with tempfile.NamedTemporaryFile(mode="w+") as tmp:
utils.write_metrics_by_namespace(
self.test_metrics_dict, tmp.name, "2023-01", self.ignore_times
)
self.assertEqual(tmp.read(), expected_output)

def test_write_metrics_by_pod_with_ignore_hours(self):
expected_output = ("Namespace,Pod Start Time,Pod End Time,Duration (Hours),Pod Name,CPU Request,GPU Request,GPU Type,GPU Resource,Node,Node Model,Memory Request (GiB),Determining Resource,SU Type,SU Count\n"
"namespace1,2024-04-10T11:00:00,2024-04-10T21:00:00,6.0000,pod1,2,0,,,Unknown Node,Unknown Model,4.0000,CPU,OpenShift CPU,2\n"
"namespace2,2024-04-10T11:00:00,2024-04-11T11:00:00,13.0000,pod2,2,0,,,Unknown Node,Unknown Model,4.0000,CPU,OpenShift CPU,2\n"
"namespace2,2024-04-11T11:00:00,2024-04-13T11:00:00,48.0000,pod2,3,0,,,Unknown Node,Unknown Model,4.0000,CPU,OpenShift CPU,3\n"
"namespace2,2024-04-10T11:00:00,2024-04-12T11:00:00,37.0000,pod3,24,1,NVIDIA-A100-SXM4-40GB,nvidia.com/gpu,Unknown Node,Unknown Model,8.0000,GPU,OpenShift GPUA100SXM4,1\n")

with tempfile.NamedTemporaryFile(mode="w+") as tmp:
utils.write_metrics_by_pod(self.test_metrics_dict, tmp.name, self.ignore_times)
self.assertEqual(tmp.read(), expected_output)

class TestGetServiceUnit(TestCase):

def make_pod(
Expand Down
9 changes: 5 additions & 4 deletions openshift_metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def csv_writer(rows, file_name):
csvwriter.writerows(rows)


def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month, ignore_hours=None):
"""
Process metrics dictionary to aggregate usage by namespace and then write that to a file
"""
Expand Down Expand Up @@ -157,7 +157,8 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
invoice_address="",
intitution="",
institution_specific_code=cf_institution_code,
rates=rates
rates=rates,
ignore_hours=ignore_hours,
)
invoices[namespace] = project_invoice

Expand Down Expand Up @@ -186,7 +187,7 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
csv_writer(rows, file_name)


def write_metrics_by_pod(condensed_metrics_dict, file_name):
def write_metrics_by_pod(condensed_metrics_dict, file_name, ignore_hours=None):
"""
Generates metrics report by pod.
"""
Expand Down Expand Up @@ -227,6 +228,6 @@ def write_metrics_by_pod(condensed_metrics_dict, file_name):
node_hostname=pod_metric_dict.get("node", "Unknown Node"),
node_model=pod_metric_dict.get("node_model", "Unknown Model"),
)
rows.append(pod_obj.generate_pod_row())
rows.append(pod_obj.generate_pod_row(ignore_hours))

csv_writer(rows, file_name)
Loading