diff --git a/Dockerfile b/Dockerfile index c2ff164..2be712c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,5 +5,7 @@ COPY requirements.txt ./ RUN pip install -r requirements.txt COPY openshift_metrics/ /app/openshift_metrics +COPY bin/collect_metrics.sh /app/collect_metrics.sh +COPY bin/produce_report.sh /app/produce_report.sh -CMD ["python", "openshift_metrics/openshift_prometheus_metrics.py", "--upload-to-s3"] +CMD ["./collect_metrics.sh"] diff --git a/bin/collect_metrics.sh b/bin/collect_metrics.sh new file mode 100755 index 0000000..cf6cb34 --- /dev/null +++ b/bin/collect_metrics.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +python -m openshift_metrics.openshift_prometheus_metrics \ + --output-file /tmp/metrics.json \ + --upload-to-s3 diff --git a/bin/produce_report.sh b/bin/produce_report.sh new file mode 100755 index 0000000..dfc5d91 --- /dev/null +++ b/bin/produce_report.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env sh + +python -m openshift_metrics.merge /data/*.json \ + --invoice-file /tmp/invoice.csv \ + --pod-report-file /tmp/pod-report.csv \ + --upload-to-s3 diff --git a/k8s/base/daily-openshift-metrics-collector-cronjob.yaml b/k8s/base/daily-openshift-metrics-collector-cronjob.yaml index 2951e5d..e7da03c 100644 --- a/k8s/base/daily-openshift-metrics-collector-cronjob.yaml +++ b/k8s/base/daily-openshift-metrics-collector-cronjob.yaml @@ -29,11 +29,5 @@ spec: secretKeyRef: name: openshift-metrics-b2-bucket key: secret-access-key - volumeMounts: - - name: data-volume - mountPath: /data - command: ["/bin/sh", "-c", "cd /data && python /app/openshift_metrics/openshift_prometheus_metrics.py --upload-to-s3"] - volumes: - - name: data-volume - emptyDir: {} + command: ["./collect_metrics.sh"] restartPolicy: OnFailure diff --git a/k8s/base/gpu-node-map-configmap.yaml b/k8s/base/gpu-node-map-configmap.yaml new file mode 100644 index 0000000..b841af3 --- /dev/null +++ b/k8s/base/gpu-node-map-configmap.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: gpu-node-map +data: + gpu_node_map.json: | + { + "wrk-88": "Tesla-V100-PCIE-32GB", + "wrk-89": "Tesla-V100-PCIE-32GB", + "wrk-94": "NVIDIA-A100-SXM4-40GB", + "wrk-95": "NVIDIA-A100-SXM4-40GB", + "wrk-97": "NVIDIA-A100-SXM4-40GB", + "wrk-98": "NVIDIA-A100-SXM4-40GB", + "wrk-99": "NVIDIA-A100-SXM4-40GB", + "wrk-102": "Tesla-V100-PCIE-32GB", + "wrk-103": "Tesla-V100-PCIE-32GB", + "wrk-104": "Tesla-V100-PCIE-32GB", + "wrk-105": "Tesla-V100-PCIE-32GB", + "wrk-106": "Tesla-V100-PCIE-32GB", + "wrk-107": "Tesla-V100-PCIE-32GB", + "wrk-108": "Tesla-V100-PCIE-32GB" + } diff --git a/k8s/base/kustomization.yaml b/k8s/base/kustomization.yaml index 5769c19..1fbae9b 100644 --- a/k8s/base/kustomization.yaml +++ b/k8s/base/kustomization.yaml @@ -2,3 +2,4 @@ resources: - daily-openshift-metrics-collector-cronjob.yaml - produce-report-cronjob.yaml - metrics-downloader-configmap.yaml + - gpu-node-map-configmap.yaml diff --git a/k8s/base/produce-report-cronjob.yaml b/k8s/base/produce-report-cronjob.yaml index b5c3319..d67e1f5 100644 --- a/k8s/base/produce-report-cronjob.yaml +++ b/k8s/base/produce-report-cronjob.yaml @@ -35,7 +35,10 @@ spec: volumeMounts: - name: data-volume mountPath: /data - command: ["/bin/sh", "-c", "cd /data && python /app/openshift_metrics/merge.py /data/*.json --upload-to-s3"] + - name: gpu-node-map + mountPath: /app/gpu_node_map.json + subPath: gpu_node_map.json + command: ["./produce_report.sh"] initContainers: - name: download-metrics image: amazon/aws-cli @@ -63,4 +66,7 @@ spec: configMap: name: metrics-downloader defaultMode: 0555 + - name: gpu-node-map + configMap: + name: gpu-node-map restartPolicy: OnFailure