From 9bfcabe67fbae384dc46e0eb50ed2052d9445408 Mon Sep 17 00:00:00 2001
From: "Wilkins, Emily (Counterpointe Solutions)"
<80470879+ewilkins-csi@users.noreply.github.com>
Date: Thu, 1 Aug 2024 09:03:38 -0500
Subject: [PATCH] #251 remove custom spark operator compilation
- remove the custom compilation of Spark Operator
- update the Spark Operator chart dependency to the latest version
- modify the Spark Operator chart to work with the stock kubeflow image
- remove custom RBAC
- change ivy cache mount location
Note that the only thing preventing the complete removal of our custom
Spark Operator image is the fact that kubeflow's Spark Operator is on
Spark 3.5 while we're on Spark 3.4. The upgrade is actually pretty easy
without many breaking changes at all, however it started to require some
dependency untangling with different versions being pulled in between
Spark 3.5 and Quarkus 2.8. Since we know we'll be upgrading Quarkus
soon, I'm leaving the Spark 3.5 upgrade to follow that. This should cut
a significant amount of time off of the image build though, as the
custom Go compilation was taking at least half of the build time.
---
build-parent/pom.xml | 1 -
.../aissemble-enforcer-extension/pom.xml | 1 -
.../src/main/resources/docker/Dockerfile | 34 +--
.../aissemble-spark-operator-chart/Chart.yaml | 2 +-
.../aissemble-spark-operator-chart/README.md | 1 -
.../templates/rbac.yaml | 130 -----------
.../tests/ivy_cache_test.yaml | 2 +-
.../tests/rbac_test.yaml | 206 ------------------
.../values.template.yaml | 13 +-
pom.xml | 1 +
10 files changed, 12 insertions(+), 379 deletions(-)
delete mode 100644 extensions/extensions-helm/aissemble-spark-operator-chart/templates/rbac.yaml
delete mode 100644 extensions/extensions-helm/aissemble-spark-operator-chart/tests/rbac_test.yaml
diff --git a/build-parent/pom.xml b/build-parent/pom.xml
index 1320248a3..2d8393a34 100644
--- a/build-parent/pom.xml
+++ b/build-parent/pom.xml
@@ -31,7 +31,6 @@
3.6.0
3.2.0
3.1.0
- 5.7.5
${version.maven.surefire.plugin}
2.10.3
2.8.0
diff --git a/build-support/aissemble-enforcer-extension/pom.xml b/build-support/aissemble-enforcer-extension/pom.xml
index aed277485..4cb566b4f 100644
--- a/build-support/aissemble-enforcer-extension/pom.xml
+++ b/build-support/aissemble-enforcer-extension/pom.xml
@@ -15,7 +15,6 @@
3.8.6
4.13.2
6.10.4
- 5.7.5
11
11
diff --git a/extensions/extensions-docker/aissemble-spark-operator/src/main/resources/docker/Dockerfile b/extensions/extensions-docker/aissemble-spark-operator/src/main/resources/docker/Dockerfile
index c3e957df5..e4a2d88a6 100644
--- a/extensions/extensions-docker/aissemble-spark-operator/src/main/resources/docker/Dockerfile
+++ b/extensions/extensions-docker/aissemble-spark-operator/src/main/resources/docker/Dockerfile
@@ -16,40 +16,20 @@
ARG DOCKER_BASELINE_REPO_ID
ARG VERSION_AISSEMBLE
-FROM golang:1.22.2-alpine as builder
-
-WORKDIR /workspace
-
-# Copy the Go Modules manifests
-COPY ./target/checkout/go* ./
-# Cache deps before building and copying source so that we don't need to re-download as much
-# and so that source changes don't invalidate our downloaded layer
-RUN go mod download
-
-# Copy the go source code
-COPY ./target/checkout/main.go main.go
-COPY ./target/checkout/pkg/ pkg/
-COPY ./target/checkout/hack/gencerts.sh /tmp/scripts/gencerts.sh
-COPY ./target/checkout/entrypoint.sh /tmp/scripts/entrypoint.sh
-
-RUN go mod tidy
-# Build
-RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o /usr/bin/spark-operator main.go
+FROM kubeflow/spark-operator:v1beta2-1.6.2-3.5.0 AS builder
+# We would be able to use the kubeflow image directly, except that it is on Spark 3.5 instead of 3.4
FROM ${DOCKER_BASELINE_REPO_ID}boozallen/aissemble-spark:${VERSION_AISSEMBLE}
LABEL org.opencontainers.image.source="https://github.com/boozallen/aissemble"
USER root
-COPY --from=builder /usr/bin/spark-operator /usr/bin/
-COPY --from=builder /tmp/scripts/* /usr/bin/
-RUN apt-get update --allow-releaseinfo-change \
- && apt-get update \
- && apt-get install -y openssl curl tini \
- && rm -rf /var/lib/apt/lists/* \
- && chmod +x /usr/bin/entrypoint.sh \
- && chmod +x /usr/bin/gencerts.sh
+COPY --from=builder --chmod=755 /usr/bin/spark-operator /usr/bin/
+COPY --from=builder --chmod=755 /usr/bin/entrypoint.sh /usr/bin/
+RUN apt-get update \
+ && apt-get install -y tini \
+ && rm -rf /var/lib/apt/lists/*
USER spark
ENTRYPOINT ["/usr/bin/entrypoint.sh"]
diff --git a/extensions/extensions-helm/aissemble-spark-operator-chart/Chart.yaml b/extensions/extensions-helm/aissemble-spark-operator-chart/Chart.yaml
index a043bc070..1d28c71f7 100644
--- a/extensions/extensions-helm/aissemble-spark-operator-chart/Chart.yaml
+++ b/extensions/extensions-helm/aissemble-spark-operator-chart/Chart.yaml
@@ -10,7 +10,7 @@ sources:
- https://github.com/boozallen/aissemble
dependencies:
- name: spark-operator
- version: 1.1.27
+ version: 1.4.6
repository: https://kubeflow.github.io/spark-operator/
import-values:
- child: batchScheduler
diff --git a/extensions/extensions-helm/aissemble-spark-operator-chart/README.md b/extensions/extensions-helm/aissemble-spark-operator-chart/README.md
index 6512b407c..800b7fbd1 100644
--- a/extensions/extensions-helm/aissemble-spark-operator-chart/README.md
+++ b/extensions/extensions-helm/aissemble-spark-operator-chart/README.md
@@ -45,7 +45,6 @@ aissemble-spark-operator-chart:
| volumes | Volumes for the pod | No | `spark-logging=/tmp/spark-logging` |
| volumeMounts | Volume Mounts for the pod | No | `spark-logging=/tmp/spark-logging` |
| fullnameOverride | String to override release name | No | spark-operator |
-| rbac.createClusterRole | See `Migrated Properties` | No | false |
| serviceAccounts.spark.name | Name for the spark service account | No | spark |
diff --git a/extensions/extensions-helm/aissemble-spark-operator-chart/templates/rbac.yaml b/extensions/extensions-helm/aissemble-spark-operator-chart/templates/rbac.yaml
deleted file mode 100644
index 8a188bf25..000000000
--- a/extensions/extensions-helm/aissemble-spark-operator-chart/templates/rbac.yaml
+++ /dev/null
@@ -1,130 +0,0 @@
-{{- if or .Values.rbac.create .Values.rbac.createClusterRole }}
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: {{ include "spark-operator.fullname" (index .Subcharts "spark-operator") }}
- annotations:
- "helm.sh/hook": pre-install, pre-upgrade
- "helm.sh/hook-delete-policy": hook-failed, before-hook-creation
- "helm.sh/hook-weight": "-10"
- labels:
- {{- include "spark-operator.labels" (index .Subcharts "spark-operator") | nindent 4 }}
-rules:
-- apiGroups:
- - ""
- resources:
- - pods
- verbs:
- - "*"
-- apiGroups:
- - ""
- resources:
- - services
- - configmaps
- - secrets
- verbs:
- - create
- - get
- - delete
- - update
- - list
-- apiGroups:
- - extensions
- - networking.k8s.io
- resources:
- - ingresses
- verbs:
- - create
- - get
- - delete
-- apiGroups:
- - ""
- resources:
- - nodes
- verbs:
- - get
-- apiGroups:
- - ""
- resources:
- - events
- verbs:
- - create
- - update
- - patch
-- apiGroups:
- - ""
- resources:
- - resourcequotas
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - apiextensions.k8s.io
- resources:
- - customresourcedefinitions
- verbs:
- - create
- - get
- - update
- - delete
-- apiGroups:
- - admissionregistration.k8s.io
- resources:
- - mutatingwebhookconfigurations
- - validatingwebhookconfigurations
- verbs:
- - create
- - get
- - update
- - delete
-- apiGroups:
- - sparkoperator.k8s.io
- resources:
- - sparkapplications
- - sparkapplications/status
- - scheduledsparkapplications
- - scheduledsparkapplications/status
- verbs:
- - "*"
- {{- if .Values.batchScheduler.enable }}
- # required for the `volcano` batch scheduler
-- apiGroups:
- - scheduling.incubator.k8s.io
- - scheduling.sigs.dev
- - scheduling.volcano.sh
- resources:
- - podgroups
- verbs:
- - "*"
- {{- end }}
- {{ if .Values.webhook.enable }}
-- apiGroups:
- - batch
- resources:
- - jobs
- verbs:
- - delete
- {{- end }}
-
----
-
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: {{ include "spark-operator.fullname" (index .Subcharts "spark-operator") }}
- annotations:
- "helm.sh/hook": pre-install, pre-upgrade
- "helm.sh/hook-delete-policy": hook-failed, before-hook-creation
- "helm.sh/hook-weight": "-10"
- labels:
- {{- include "spark-operator.labels" (index .Subcharts "spark-operator") | nindent 4 }}
-subjects:
- - kind: ServiceAccount
- name: {{ include "spark-operator.serviceAccountName" (index .Subcharts "spark-operator") }}
- namespace: {{ .Release.Namespace }}
-roleRef:
- kind: ClusterRole
- name: {{ include "spark-operator.fullname" (index .Subcharts "spark-operator") }}
- apiGroup: rbac.authorization.k8s.io
-{{- end }}
diff --git a/extensions/extensions-helm/aissemble-spark-operator-chart/tests/ivy_cache_test.yaml b/extensions/extensions-helm/aissemble-spark-operator-chart/tests/ivy_cache_test.yaml
index 175c541a6..9ac0c99ac 100644
--- a/extensions/extensions-helm/aissemble-spark-operator-chart/tests/ivy_cache_test.yaml
+++ b/extensions/extensions-helm/aissemble-spark-operator-chart/tests/ivy_cache_test.yaml
@@ -138,4 +138,4 @@ tests:
# path: spec.volumeMounts
# content:
# name: ivy-cache
-# mountPath: /opt/spark/.ivy2
\ No newline at end of file
+# mountPath: /home/spark/.ivy2
\ No newline at end of file
diff --git a/extensions/extensions-helm/aissemble-spark-operator-chart/tests/rbac_test.yaml b/extensions/extensions-helm/aissemble-spark-operator-chart/tests/rbac_test.yaml
deleted file mode 100644
index 5d386c24f..000000000
--- a/extensions/extensions-helm/aissemble-spark-operator-chart/tests/rbac_test.yaml
+++ /dev/null
@@ -1,206 +0,0 @@
-suite: spark-operator
-templates:
- - rbac.yaml
-tests:
- - it: Should contain ClusterRole document
- documentIndex: 0
- asserts:
- - containsDocument:
- kind: ClusterRole
- apiVersion: rbac.authorization.k8s.io/v1
- - it: Should contain ClusterRoleBinding document
- documentIndex: 1
- asserts:
- - containsDocument:
- kind: ClusterRoleBinding
- apiVersion: rbac.authorization.k8s.io/v1
- - it: Should be 2 documents in total
- asserts:
- - hasDocuments:
- count: 2
- - it: Does not produce documents if options are disabled
- set:
- rbac.create: false
- rbac.createClusterRole: false
- asserts:
- - hasDocuments:
- count: 0
- - it: ClusterRoleBinding should include appropriate default values
- documentIndex: 1
- release:
- namespace: default
- asserts:
- - equal:
- path: metadata.name
- value: spark-operator
- - contains:
- path: subjects
- content:
- kind: ServiceAccount
- name: sparkoperator
- namespace: default
- - equal:
- path: roleRef.kind
- value: ClusterRole
- - equal:
- path: roleRef.name
- value: spark-operator
- - equal:
- path: roleRef.apiGroup
- value: rbac.authorization.k8s.io
- - it: ClusterRole conditional rules should be responsive to values setting
- documentIndex: 0
- set:
- batchScheduler.enable: true
- webhook.enable: false
- asserts:
- - notContains:
- path: rules
- content:
- apiGroups:
- - batch
- resources:
- - jobs
- verbs:
- - delete
- - contains:
- path: rules
- content:
- apiGroups:
- - scheduling.incubator.k8s.io
- - scheduling.sigs.dev
- - scheduling.volcano.sh
- resources:
- - podgroups
- verbs:
- - "*"
- - it: ClusterRole should include appropriate default values
- documentIndex: 0
- asserts:
- - equal:
- path: metadata.name
- value: spark-operator
- - contains:
- path: rules
- content:
- apiGroups:
- - ""
- resources:
- - pods
- verbs:
- - "*"
- - contains:
- path: rules
- content:
- apiGroups:
- - ""
- resources:
- - services
- - configmaps
- - secrets
- verbs:
- - create
- - get
- - delete
- - update
- - list
- - contains:
- path: rules
- content:
- apiGroups:
- - extensions
- - networking.k8s.io
- resources:
- - ingresses
- verbs:
- - create
- - get
- - delete
- - contains:
- path: rules
- content:
- apiGroups:
- - ""
- resources:
- - nodes
- verbs:
- - get
- - contains:
- path: rules
- content:
- apiGroups:
- - ""
- resources:
- - events
- verbs:
- - create
- - update
- - patch
- - contains:
- path: rules
- content:
- apiGroups:
- - ""
- resources:
- - resourcequotas
- verbs:
- - get
- - list
- - watch
- - contains:
- path: rules
- content:
- apiGroups:
- - apiextensions.k8s.io
- resources:
- - customresourcedefinitions
- verbs:
- - create
- - get
- - update
- - delete
- - contains:
- path: rules
- content:
- apiGroups:
- - admissionregistration.k8s.io
- resources:
- - mutatingwebhookconfigurations
- - validatingwebhookconfigurations
- verbs:
- - create
- - get
- - update
- - delete
- - contains:
- path: rules
- content:
- apiGroups:
- - sparkoperator.k8s.io
- resources:
- - sparkapplications
- - sparkapplications/status
- - scheduledsparkapplications
- - scheduledsparkapplications/status
- verbs:
- - "*"
- - notContains:
- path: rules
- content:
- apiGroups:
- - scheduling.incubator.k8s.io
- - scheduling.sigs.dev
- - scheduling.volcano.sh
- resources:
- - podgroups
- verbs:
- - "*"
- - contains:
- path: rules
- content:
- apiGroups:
- - batch
- resources:
- - jobs
- verbs:
- - delete
\ No newline at end of file
diff --git a/extensions/extensions-helm/aissemble-spark-operator-chart/values.template.yaml b/extensions/extensions-helm/aissemble-spark-operator-chart/values.template.yaml
index f4f15e037..8574cdd5c 100644
--- a/extensions/extensions-helm/aissemble-spark-operator-chart/values.template.yaml
+++ b/extensions/extensions-helm/aissemble-spark-operator-chart/values.template.yaml
@@ -30,11 +30,6 @@ spark-operator:
fsGroup: 1000
fsGroupChangePolicy: "OnRootMismatch"
- rbac:
- # -- Create and use RBAC `ClusterRole` resources
- # -- Set to false in order to enable overriding with our own RBAC template
- createClusterRole: false
-
# volumes - Operator volumes
volumes:
- name: spark-logging
@@ -48,7 +43,7 @@ spark-operator:
- name: spark-logging
mountPath: "/tmp/spark-events"
- name: ivy-cache
- mountPath: "/opt/spark/.ivy2"
+ mountPath: "/home/spark/.ivy2"
webhook:
# -- Enable webhook server
@@ -61,8 +56,4 @@ spark-operator:
sparkoperator:
# -- Optional name for the operator service account
- name: "sparkoperator"
-
-rbac:
- # -- Set to True in order to enable overriding with our own RBAC template
- createClusterRole: True
\ No newline at end of file
+ name: "sparkoperator"
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 3cf8243a2..3397b5641 100644
--- a/pom.xml
+++ b/pom.xml
@@ -67,6 +67,7 @@
27.7.18
2.6.0
3.1.4.Final
+ 5.7.5