CCI-MOC · naved001 · Oct 25, 2024 · Oct 16, 2024 · Oct 21, 2024 · Oct 22, 2024
diff --git a/openshift_metrics/invoice.py b/openshift_metrics/invoice.py
@@ -1,7 +1,7 @@
 import math
 from dataclasses import dataclass, field
 from collections import namedtuple
-from typing import List
+from typing import List, Tuple, Optional
 from decimal import Decimal, ROUND_HALF_UP
 import datetime
 
@@ -112,15 +112,32 @@ def get_service_unit(self) -> ServiceUnit:
 
         return ServiceUnit(su_type, su_count, determining_resource)
 
-    def get_runtime(self) -> Decimal:
+    def get_runtime(
+        self, ignore_times: List[Tuple[datetime.datetime, datetime.datetime]] = None
+    ) -> Decimal:
         """Return runtime eligible for billing in hours"""
-        return Decimal(self.duration) / 3600
+
+        total_runtime = self.duration
+
+        if ignore_times:
+            for ignore_start_date, ignore_end_date in ignore_times:
+                ignore_start = int(ignore_start_date.timestamp())
+                ignore_end = int(ignore_end_date.timestamp())
+                if ignore_end <= self.start_time or ignore_start >= self.end_time:
+                    continue
+                overlap_start = max(self.start_time, ignore_start)
+                overlap_end = min(self.end_time, ignore_end)
+
+                overlap_duration = max(0, overlap_end - overlap_start)
+                total_runtime = max(0, total_runtime - overlap_duration)
+
+        return Decimal(total_runtime) / 3600
 
     @property
     def end_time(self) -> int:
         return self.start_time + self.duration
 
-    def generate_pod_row(self):
+    def generate_pod_row(self, ignore_times):
         """
         This returns a row to represent pod data.
         It converts the epoch_time stamps to datetime timestamps so it's more readable.
@@ -136,7 +153,7 @@ def generate_pod_row(self):
         memory_request = self.memory_request.quantize(
             Decimal(".0001"), rounding=ROUND_HALF_UP
         )
-        runtime = self.get_runtime().quantize(Decimal(".0001"), rounding=ROUND_HALF_UP)
+        runtime = self.get_runtime(ignore_times).quantize(Decimal(".0001"), rounding=ROUND_HALF_UP)
         return [
             self.namespace,
             start_time,
@@ -177,6 +194,7 @@ class ProjectInvoce:
     intitution: str
     institution_specific_code: str
     rates: Rates
+    ignore_hours: Optional[List[Tuple[datetime.datetime, datetime.datetime]]] = None
     su_hours: dict = field(
         default_factory=lambda: {
             SU_CPU: 0,
@@ -192,7 +210,7 @@ class ProjectInvoce:
     def add_pod(self, pod: Pod) -> None:
         """Aggregate a pods data"""
         su_type, su_count, _ = pod.get_service_unit()
-        duration_in_hours = pod.get_runtime()
+        duration_in_hours = pod.get_runtime(self.ignore_hours)
         self.su_hours[su_type] += su_count * duration_in_hours
 
     def get_rate(self, su_type) -> Decimal:

diff --git a/openshift_metrics/merge.py b/openshift_metrics/merge.py
@@ -3,8 +3,9 @@
 """
 
 import argparse
-from datetime import datetime
+from datetime import datetime, UTC
 import json
+from typing import Tuple
 
 from openshift_metrics import utils
 from openshift_metrics.metrics_processor import MetricsProcessor
@@ -16,6 +17,20 @@ def compare_dates(date_str1, date_str2):
     return date1 < date2
 
 
+def parse_timestamp_range(timestamp_range: str) -> Tuple[datetime, datetime]:
+    try:
+        start_str, end_str = timestamp_range.split(",")
+        start_dt = datetime.fromisoformat(start_str).replace(tzinfo=UTC)
+        end_dt = datetime.fromisoformat(end_str).replace(tzinfo=UTC)
+
+        if start_dt > end_dt:
+            raise argparse.ArgumentTypeError("Ignore start time is after ignore end time")
+        return start_dt, end_dt
+    except ValueError:
+        raise argparse.ArgumentTypeError(
+            "Timestamp range must be in the format 'YYYY-MM-DDTHH:MM:SS,YYYY-MM-DDTHH:MM:SS'"
+        )
+
 def main():
     """Reads the metrics from files and generates the reports"""
     parser = argparse.ArgumentParser()
@@ -25,13 +40,21 @@ def main():
         "--upload-to-s3",
         action="store_true"
     )
+    parser.add_argument(
+        "--ignore-hours",
+        type=parse_timestamp_range,
+        nargs="*",
+        help="List of timestamp ranges in UTC to ignore in the format 'YYYY-MM-DDTHH:MM:SS,YYYY-MM-DDTHH:MM:SS'"
+    )
+
     args = parser.parse_args()
     files = args.files
 
     if args.output_file:
         output_file = args.output_file
     else:
         output_file = f"{datetime.today().strftime('%Y-%m-%d')}.csv"
+    ignore_hours = args.ignore_hours
 
     report_start_date = None
     report_end_date = None
@@ -76,9 +99,10 @@ def main():
     utils.write_metrics_by_namespace(
         condensed_metrics_dict,
         output_file,
-        report_month
+        report_month,
+        ignore_hours,
     )
-    utils.write_metrics_by_pod(condensed_metrics_dict, "pod-" + output_file)
+    utils.write_metrics_by_pod(condensed_metrics_dict, "pod-" + output_file, ignore_hours)
 
     if args.upload_to_s3:
         primary_location = (

diff --git a/openshift_metrics/tests/test_invoice.py b/openshift_metrics/tests/test_invoice.py
@@ -0,0 +1,69 @@
+from unittest import TestCase
+from datetime import datetime
+from decimal import Decimal
+
+from openshift_metrics import invoice
+
+
+class TestPodGetRuntime(TestCase):
+    def setUp(self):
+        """Gives us a pod that starts at 2024-10-11 12:00 UTC and ends at 2024-10-11 20:00 UTC"""
+        self.pod = invoice.Pod(
+            pod_name="test-pod",
+            namespace="test-namespace",
+            start_time=int(datetime(2024, 10, 11, 12, 0).timestamp()),
+            duration=3600 * 8,
+            cpu_request=Decimal("1.0"),
+            gpu_request=Decimal(0),
+            memory_request=Decimal("4.0"),
+            gpu_type=None,
+            gpu_resource=None,
+            node_hostname="node-1",
+            node_model=None,
+        )
+
+    def test_no_ignore_times(self):
+        runtime = self.pod.get_runtime()
+        self.assertEqual(runtime, Decimal("8.0"))
+
+    def test_one_ignore_range(self):
+        ignore_range = [(datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 14, 0))]
+        self.assertEqual(self.pod.get_runtime(ignore_range), Decimal(7.0))
+
+    def test_multiple_ignore_times(self):
+        ignore_times = [
+            (datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 14, 0)),
+            (datetime(2024, 10, 11, 14, 0), datetime(2024, 10, 11, 15, 0)),
+            (datetime(2024, 10, 11, 19, 0), datetime(2024, 10, 11, 20, 0)),
+        ]
+        self.assertEqual(self.pod.get_runtime(ignore_times), Decimal(5.0))
+
+    def test_ignore_times_outside_runtime(self):
+        ignore_times = [
+            (
+                datetime(2024, 10, 11, 10, 0),
+                datetime(2024, 10, 11, 11, 0),
+            ),  # before start
+            (datetime(2024, 10, 11, 20, 0), datetime(2024, 10, 11, 22, 0)),  # after end
+        ]
+        self.assertEqual(self.pod.get_runtime(ignore_times), Decimal(8.0))
+
+    def test_partial_overlap_ignore_range(self):
+        ignore_range = [
+            (datetime(2024, 10, 11, 10, 30), datetime(2024, 10, 11, 14, 30))
+        ]
+        self.assertEqual(self.pod.get_runtime(ignore_range), Decimal(5.5))
+
+    def test_ignore_range_greater_than_pod_runtime(self):
+        ignore_range = [
+            (datetime(2024, 10, 11, 11, 00), datetime(2024, 10, 11, 21, 00))
+        ]
+        self.assertEqual(self.pod.get_runtime(ignore_range), Decimal(0))
+
+    def test_runtime_is_never_negative(self):
+        ignore_times = [
+            (datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 17, 0)),
+            (datetime(2024, 10, 11, 13, 0), datetime(2024, 10, 11, 17, 0)),
+            (datetime(2024, 10, 11, 10, 0), datetime(2024, 10, 11, 22, 0)),
+        ]
+        self.assertEqual(self.pod.get_runtime(ignore_times), Decimal(0.0))
diff --git a/openshift_metrics/tests/test_utils.py b/openshift_metrics/tests/test_utils.py
@@ -16,6 +16,7 @@
 
 from openshift_metrics import utils, invoice
 import os
+from datetime import datetime, UTC
 
 class TestGetNamespaceAnnotations(TestCase):
 
@@ -289,6 +290,102 @@ def test_write_metrics_by_namespace_decimal(self, mock_gna):
             self.assertEqual(tmp.read(), expected_output)
 
 
+class TestWriteMetricsWithIgnoreHours(TestCase):
+    def setUp(self):
+        """Creates a test dictionary with condensed data that can be used to test WriteMetricsByPod and WriteMetricsByNamespace"""
+        start_dt = int(datetime.fromisoformat("2024-04-10T11:00:00Z").timestamp())
+        self.ignore_times = [
+            (
+                datetime(2024, 4, 9, 11, 0, 0, tzinfo=UTC),
+                datetime(2024, 4, 10, 15, 0, 0, tzinfo=UTC),
+            ),
+            (
+                datetime(2024, 4, 10, 22, 0, 0, tzinfo=UTC),
+                datetime(2024, 4, 11, 5, 0, 0, tzinfo=UTC)
+            ),
+        ]
+        HOUR = 60 * 60
+        self.test_metrics_dict = {
+            "namespace1": {
+                "pod1": {  # runs from 2024-04-10T11:00:00Z to 2024-04-10T21:00:00Z - 2 SU * 6 billable hours
+                    "metrics": {
+                        start_dt: {
+                            "cpu_request": 2,
+                            "memory_request": 4 * 2**30,
+                            "duration": 10 * HOUR,
+                        },
+                    }
+                },
+            },
+            "namespace2": {
+                "pod2": {
+                    "metrics": {
+                        start_dt: {  # runs from 2024-04-10T11:00:00Z to 2024-04-11T11:00:00Z - 2 SU * 13 billable hours
+                            "cpu_request": 2,
+                            "memory_request": 4 * 2**30,
+                            "duration": 24 * HOUR,
+                        },
+                        start_dt + 24 * HOUR: {  # runs from 2024-04-11T11:00:00Z to 2024-04-13T11:00:00Z - 3 SU * 48 billable hours
+                            "cpu_request": 3,
+                            "memory_request": 4 * 2**30,
+                            "duration": 48 * HOUR,
+                        },
+                    }
+                },
+                "pod3": {  # runs from 2024-04-10T11:00:00Z to 2024-04-12T11:00:00Z - 1 SU * 37 billable hours
+                    "gpu_type": invoice.GPU_A100_SXM4,
+                    "metrics": {
+                        start_dt: {
+                            "cpu_request": 24,
+                            "memory_request": 8 * 2**30,
+                            "gpu_request": 1,
+                            "gpu_type": invoice.GPU_A100_SXM4,
+                            "gpu_resource": invoice.WHOLE_GPU,
+                            "duration": 48 * HOUR,
+                        },
+                    },
+                },
+            },
+        }
+
+    @mock.patch("openshift_metrics.utils.get_namespace_attributes")
+    def test_write_metrics_by_namespace_with_ignore_hours(self, mock_gna):
+        mock_gna.return_value = {
+            "namespace1": {
+                "cf_pi": "PI1",
+                "cf_project_id": "123",
+                "institution_code": "cf-code-1",
+            },
+            "namespace2": {
+                "cf_pi": "PI2",
+                "cf_project_id": "456",
+                "institution_code": "cf-code-2",
+            },
+        }
+        expected_output = (
+            "Invoice Month,Project - Allocation,Project - Allocation ID,Manager (PI),Invoice Email,Invoice Address,Institution,Institution - Specific Code,SU Hours (GBhr or SUhr),SU Type,Rate,Cost\n"
+            "2023-01,namespace1,namespace1,PI1,,,,cf-code-1,12,OpenShift CPU,0.013,0.16\n"
+            "2023-01,namespace2,namespace2,PI2,,,,cf-code-2,170,OpenShift CPU,0.013,2.21\n"
+            "2023-01,namespace2,namespace2,PI2,,,,cf-code-2,37,OpenShift GPUA100SXM4,2.078,76.89\n"
+        )
+
+        with tempfile.NamedTemporaryFile(mode="w+") as tmp:
+            utils.write_metrics_by_namespace(
+                self.test_metrics_dict, tmp.name, "2023-01", self.ignore_times
+            )
+            self.assertEqual(tmp.read(), expected_output)
+
+    def test_write_metrics_by_pod_with_ignore_hours(self):
+        expected_output = ("Namespace,Pod Start Time,Pod End Time,Duration (Hours),Pod Name,CPU Request,GPU Request,GPU Type,GPU Resource,Node,Node Model,Memory Request (GiB),Determining Resource,SU Type,SU Count\n"
+                           "namespace1,2024-04-10T11:00:00,2024-04-10T21:00:00,6.0000,pod1,2,0,,,Unknown Node,Unknown Model,4.0000,CPU,OpenShift CPU,2\n"
+                           "namespace2,2024-04-10T11:00:00,2024-04-11T11:00:00,13.0000,pod2,2,0,,,Unknown Node,Unknown Model,4.0000,CPU,OpenShift CPU,2\n"
+                           "namespace2,2024-04-11T11:00:00,2024-04-13T11:00:00,48.0000,pod2,3,0,,,Unknown Node,Unknown Model,4.0000,CPU,OpenShift CPU,3\n"
+                           "namespace2,2024-04-10T11:00:00,2024-04-12T11:00:00,37.0000,pod3,24,1,NVIDIA-A100-SXM4-40GB,nvidia.com/gpu,Unknown Node,Unknown Model,8.0000,GPU,OpenShift GPUA100SXM4,1\n")
+
+        with tempfile.NamedTemporaryFile(mode="w+") as tmp:
+            utils.write_metrics_by_pod(self.test_metrics_dict, tmp.name, self.ignore_times)
+            self.assertEqual(tmp.read(), expected_output)
+
 class TestGetServiceUnit(TestCase):
 
     def make_pod(

diff --git a/openshift_metrics/utils.py b/openshift_metrics/utils.py
@@ -110,7 +110,7 @@ def csv_writer(rows, file_name):
         csvwriter.writerows(rows)
 
 
-def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
+def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month, ignore_hours=None):
     """
     Process metrics dictionary to aggregate usage by namespace and then write that to a file
     """
@@ -157,7 +157,8 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
                 invoice_address="",
                 intitution="",
                 institution_specific_code=cf_institution_code,
-                rates=rates
+                rates=rates,
+                ignore_hours=ignore_hours,
             )
             invoices[namespace] = project_invoice
 
@@ -186,7 +187,7 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
     csv_writer(rows, file_name)
 
 
-def write_metrics_by_pod(condensed_metrics_dict, file_name):
+def write_metrics_by_pod(condensed_metrics_dict, file_name, ignore_hours=None):
     """
     Generates metrics report by pod.
     """
@@ -227,6 +228,6 @@ def write_metrics_by_pod(condensed_metrics_dict, file_name):
                     node_hostname=pod_metric_dict.get("node", "Unknown Node"),
                     node_model=pod_metric_dict.get("node_model", "Unknown Model"),
                 )
-                rows.append(pod_obj.generate_pod_row())
+                rows.append(pod_obj.generate_pod_row(ignore_hours))
 
     csv_writer(rows, file_name)