Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include rate cost #9

Merged
merged 2 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions openshift_metrics/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,12 +467,12 @@ def test_write_metrics_log(self, mock_gna):
}

expected_output = ("Namespace,Coldfront_PI Name,Coldfront Project ID ,Pod Start Time,Pod End Time,Duration (Hours),Pod Name,CPU Request,GPU Request,GPU Type,Memory Request (GiB),Determining Resource,SU Type,SU Count\n"
"namespace1,PI1,123,1970-01-01T00:00:00,1970-01-01T00:02:00,0.0333,pod1,10,0,No GPU,0.001,CPU,SU_CPU,10\n"
"namespace1,PI1,123,1970-01-01T00:02:00,1970-01-01T00:03:00,0.0167,pod1,20,0,No GPU,0.001,CPU,SU_CPU,20\n"
"namespace1,PI1,123,1970-01-01T00:00:00,1970-01-01T00:01:00,0.0167,pod2,20,0,No GPU,0.0098,CPU,SU_CPU,20\n"
"namespace1,PI1,123,1970-01-01T00:01:00,1970-01-01T00:02:00,0.0167,pod2,25,0,No GPU,0.0098,CPU,SU_CPU,25\n"
"namespace1,PI1,123,1970-01-01T00:02:00,1970-01-01T00:03:00,0.0167,pod2,20,0,No GPU,0.0098,CPU,SU_CPU,20\n"
"namespace2,PI2,456,1970-01-01T00:00:00,1970-01-01T00:03:00,0.05,pod3,45,0,No GPU,0.0977,CPU,SU_CPU,45\n")
"namespace1,PI1,123,1970-01-01T00:00:00,1970-01-01T00:02:00,0.0333,pod1,10,0,No GPU,0.001,CPU,OpenShift CPU,10\n"
"namespace1,PI1,123,1970-01-01T00:02:00,1970-01-01T00:03:00,0.0167,pod1,20,0,No GPU,0.001,CPU,OpenShift CPU,20\n"
"namespace1,PI1,123,1970-01-01T00:00:00,1970-01-01T00:01:00,0.0167,pod2,20,0,No GPU,0.0098,CPU,OpenShift CPU,20\n"
"namespace1,PI1,123,1970-01-01T00:01:00,1970-01-01T00:02:00,0.0167,pod2,25,0,No GPU,0.0098,CPU,OpenShift CPU,25\n"
"namespace1,PI1,123,1970-01-01T00:02:00,1970-01-01T00:03:00,0.0167,pod2,20,0,No GPU,0.0098,CPU,OpenShift CPU,20\n"
"namespace2,PI2,456,1970-01-01T00:00:00,1970-01-01T00:03:00,0.05,pod3,45,0,No GPU,0.0977,CPU,OpenShift CPU,45\n")

tmp_file_name = "%s/test-metrics-%s.log" % (tempfile.gettempdir(), time.time())
utils.write_metrics_by_pod(test_metrics_dict, tmp_file_name)
Expand Down Expand Up @@ -552,7 +552,7 @@ def test_write_metrics_log(self, mock_gna):
},
"pod5": {
"namespace": "namespace2",
"gpu_type": utils.GPU_A10,
"gpu_type": utils.GPU_A2,
"metrics": {
0: {
"cpu_request": 24,
Expand All @@ -565,10 +565,10 @@ def test_write_metrics_log(self, mock_gna):
}

expected_output = ("Invoice Month,Project - Allocation,Project - Allocation ID,Manager (PI),Invoice Email,Invoice Address,Institution,Institution - Specific Code,SU Hours (GBhr or SUhr),SU Type,Rate,Cost\n"
"2023-01,namespace1,namespace1,PI1,,,,,1128,SU_CPU,,\n"
"2023-01,namespace2,namespace2,PI2,,,,,96,SU_CPU,,\n"
"2023-01,namespace2,namespace2,PI2,,,,,48.0,SU_A100_GPU,,\n"
"2023-01,namespace2,namespace2,PI2,,,,,144.0,SU_A10_GPU,,\n")
"2023-01,namespace1,namespace1,PI1,,,,,1128,OpenShift CPU,0.013,14.664\n"
"2023-01,namespace2,namespace2,PI2,,,,,96,OpenShift CPU,0.013,1.248\n"
"2023-01,namespace2,namespace2,PI2,,,,,48.0,OpenShift GPUA100,1.803,86.544\n"
"2023-01,namespace2,namespace2,PI2,,,,,144.0,OpenShift GPUA2,0.466,67.104\n")

tmp_file_name = "%s/test-metrics-%s.log" % (tempfile.gettempdir(), time.time())
utils.write_metrics_by_namespace(test_metrics_dict, tmp_file_name, "2023-01")
Expand Down
72 changes: 41 additions & 31 deletions openshift_metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,26 @@

# GPU types
GPU_A100 = "nvidia.com/gpu_A100"
GPU_A10 = "nvidia.com/gpu_A10"
GPU_MOC = "nvidia.com/gpu"
GPU_A2 = "nvidia.com/gpu_A2"
GPU_V100 = "nvidia.com/gpu_V100"
GPU_GENERIC = "nvidia.com/gpu"
NO_GPU = "No GPU"

# SU Types
SU_CPU = "SU_CPU"
SU_A100_GPU = "SU_A100_GPU"
SU_A10_GPU = "SU_A10_GPU"
SU_MOC_GPU = "SU_MOC_GPU"
SU_UNKNOWN_GPU = "SU_UNKNOWN_GPU"
SU_UNKNOWN = "SU_UNKNOWN"
SU_CPU = "OpenShift CPU"
SU_A100_GPU = "OpenShift GPUA100"
SU_A2_GPU = "OpenShift GPUA2"
SU_V100_GPU = "OpenShift GPUV100"
SU_UNKNOWN_GPU = "OpenShift Unknown GPU"
SU_UNKNOWN = "Openshift Unknown"

RATE = {
SU_CPU: 0.013,
SU_A100_GPU: 1.803,
SU_A2_GPU: 0.466,
SU_V100_GPU: 0.902,
SU_UNKNOWN_GPU: 0,
}

STEP_MIN = 15

Expand Down Expand Up @@ -97,18 +106,19 @@ def get_service_unit(cpu_count, memory_count, gpu_count, gpu_type):

known_gpu_su = {
GPU_A100: SU_A100_GPU,
GPU_A10: SU_A10_GPU,
GPU_MOC: SU_MOC_GPU,
GPU_A2: SU_A2_GPU,
GPU_V100: SU_V100_GPU,
GPU_GENERIC: SU_UNKNOWN_GPU,
}

# GPU count for some configs is -1 for math reasons, in reality it is 0
su_config = {
SU_CPU: {"gpu": -1, "cpu": 1, "ram": 4},
SU_A100_GPU: {"gpu": 1, "cpu": 24, "ram": 96},
SU_A10_GPU: {"gpu": 1, "cpu": 8, "ram": 64},
SU_V100_GPU: {"gpu": 1, "cpu": 24, "ram": 96},
SU_A2_GPU: {"gpu": 1, "cpu": 8, "ram": 64},
SU_UNKNOWN_GPU: {"gpu": 1, "cpu": 8, "ram": 64},
SU_UNKNOWN: {"gpu": -1, "cpu": 1, "ram": 1},
SU_MOC_GPU: {"gpu": 1, "cpu": 24, "ram": 128},
}

if gpu_type is None and gpu_count == 0:
Expand Down Expand Up @@ -251,8 +261,8 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
"_memory_hours": 0,
"SU_CPU_HOURS": 0,
"SU_A100_GPU_HOURS": 0,
"SU_A10_GPU_HOURS": 0,
"SU_MOC_GPU_HOURS": 0,
"SU_A2_GPU_HOURS": 0,
"SU_V100_GPU_HOURS": 0,
"total_cost": 0,
}

Expand All @@ -267,16 +277,16 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
float(cpu_request), memory_request, float(gpu_request), gpu_type
)
metrics_by_namespace[namespace]["SU_A100_GPU_HOURS"] += su_count * duration_in_hours
elif gpu_type == GPU_A10:
elif gpu_type == GPU_A2:
_, su_count, _ = get_service_unit(
float(cpu_request), memory_request, float(gpu_request), gpu_type
)
metrics_by_namespace[namespace]["SU_A10_GPU_HOURS"] += su_count * duration_in_hours
elif gpu_type == GPU_MOC:
metrics_by_namespace[namespace]["SU_A2_GPU_HOURS"] += su_count * duration_in_hours
elif gpu_type == GPU_GENERIC:
_, su_count, _ = get_service_unit(
float(cpu_request), memory_request, float(gpu_request), gpu_type
)
metrics_by_namespace[namespace]["SU_MOC_GPU_HOURS"] += su_count * duration_in_hours
metrics_by_namespace[namespace]["SU_V100_GPU_HOURS"] += su_count * duration_in_hours
else:
metrics_by_namespace[namespace]["_cpu_hours"] += cpu_request * duration_in_hours
metrics_by_namespace[namespace]["_memory_hours"] += (
Expand All @@ -302,8 +312,8 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
"", #Institution - Specific Code
str(metrics["SU_CPU_HOURS"]),
SU_CPU,
"", #Rate
"" #Cost
str(RATE.get(SU_CPU)),
str(RATE.get(SU_CPU) * metrics["SU_CPU_HOURS"])
]
rows.append(row)

Expand All @@ -319,12 +329,12 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
"", #Institution - Specific Code
str(metrics["SU_A100_GPU_HOURS"]),
SU_A100_GPU,
"", #Rate
"" #Cost
str(RATE.get(SU_A100_GPU)),
str(RATE.get(SU_A100_GPU) * metrics["SU_A100_GPU_HOURS"])
]
rows.append(row)

if metrics["SU_A10_GPU_HOURS"] != 0:
if metrics["SU_A2_GPU_HOURS"] != 0:
row = [
report_month,
namespace,
Expand All @@ -334,14 +344,14 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
"", #Invoice Address
"", #Institution
"", #Institution - Specific Code
str(metrics["SU_A10_GPU_HOURS"]),
SU_A10_GPU,
"", #Rate
"" #Cost
str(metrics["SU_A2_GPU_HOURS"]),
SU_A2_GPU,
str(RATE.get(SU_A2_GPU)),
str(RATE.get(SU_A2_GPU) * metrics["SU_A2_GPU_HOURS"])
]
rows.append(row)

if metrics["SU_MOC_GPU_HOURS"] != 0:
if metrics["SU_V100_GPU_HOURS"] != 0:
row = [
report_month,
namespace,
Expand All @@ -351,10 +361,10 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month):
"", #Invoice Address
"", #Institution
"", #Institution - Specific Code
str(metrics["SU_MOC_GPU_HOURS"]),
str(metrics["SU_V100_GPU_HOURS"]),
SU_MOC_GPU,
"", #Rate
"" #Cost
str(RATE.get(SU_V100_GPU)),
str(RATE.get(SU_V100_GPU) * metrics["SU_V100_GPU_HOURS"]) #Cost
]
rows.append(row)

Expand Down