Skip to content

Commit

Permalink
[Addon #579] Refactor the spark-workload parameter definition and add…
Browse files Browse the repository at this point in the history
… spark-py example

Signed-off-by: yanghua <yanghua1127@gmail.com>
  • Loading branch information
yanghua committed Mar 2, 2023
1 parent 8916021 commit 4b52ee4
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 2 deletions.
49 changes: 49 additions & 0 deletions examples/spark-kubernetes-operator/sparkapp-py.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
apiVersion: core.oam.dev/v1beta1
kind: Application
metadata:
name: spark-app-v1
namespace: spark-cluster
spec:
components:
- name: spark-workload-component
type: spark-workload
properties:
name: my-spark-py-app
namespace: spark-cluster
type: Python
pythonVersion: "3"
mode: cluster
image: "gcr.io/spark-operator/spark-py:v3.1.1"
imagePullPolicy: Always
mainClass: org.apache.spark.examples.streaming.JavaQueueStream
mainApplicationFile: "local:///opt/spark/examples/src/main/python/pi.py"
sparkVersion: "3.1.1"
restartPolicy:
type: OnFailure
onFailureRetries: 3
onFailureRetryInterval: 10
onSubmissionFailureRetries: 5
onSubmissionFailureRetryInterval: 20
volumes:
- name: "test-volume"
hostPath:
path: "/tmp"
type: Directory
driver:
cores: 1
coreLimit: "1200m"
memory: "1024m"
labels:
version: 3.1.1
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
executor:
cores: 1
instances: 1
memory: "1024m"
labels:
version: 3.1.1
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
10 changes: 10 additions & 0 deletions examples/spark-kubernetes-operator/sparkapp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,28 @@ spec:
mainClass: org.apache.spark.examples.streaming.JavaQueueStream
mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.12-3.1.1.jar"
sparkVersion: "3.1.1"
restartPolicy:
type: Never
volumes:
- name: "test-volume"
hostPath:
path: "/tmp"
type: Directory
driver:
cores: 1
coreLimit: "1200m"
memory: "1024m"
labels:
version: 3.1.1
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
executor:
cores: 1
instances: 1
memory: "1024m"
labels:
version: 3.1.1
volumeMounts:
- name: "test-volume"
mountPath: "/tmp"
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,47 @@ template: {
mainApplicationFile: string
// +usage=Specify the version of Spark the application uses
sparkVersion: string
// +usage=Specify the policy on if and in which conditions the controller should restart an application
restartPolicy?: {
// +usage=Type value option: "Always", "Never", "OnFailure"
type: string
// +usage=Specify the number of times to retry submitting an application before giving up. This is best effort and actual retry attempts can be >= the value specified due to caching. These are required if RestartPolicy is OnFailure
onSubmissionFailureRetries?: int
// +usage=Specify the number of times to retry running an application before giving up
onFailureRetries?: int
// +usage=Specify the interval in seconds between retries on failed submissions
onSubmissionFailureRetryInterval?: int
// +usage=Specify the interval in seconds between retries on failed runs
onFailureRetryInterval?: int
}
// +usage=Specify the driver sepc request for the driver pod
driver: {
cores: int
// +usage=Specify the cores maps to spark.driver.cores or spark.executor.cores for the driver and executors, respectively
cores?: int
// +usage=Specify a hard limit on CPU cores for the pod
coreLimit?: string
// +usage=Specify the amount of memory to request for the pod
memory?: string
// +usage=Specify the Kubernetes labels to be added to the pod
labels?: [string]: string
// +usage=Specify the volumes listed in “.spec.volumes” to mount into the main container’s filesystem
volumeMounts?: [...{
name: string
mountPath: string
}]
}
// +usage=Specify the executor spec request for the executor pod
executor: {
cores: int
// +usage=Specify the cores maps to spark.driver.cores or spark.executor.cores for the driver and executors, respectively
cores?: int
// +usage=Specify a hard limit on CPU cores for the pod
coreLimit?: string
// +usage=Specify the amount of memory to request for the pod
memory?: string
instances?: int
// +usage=Specify the Kubernetes labels to be added to the pod
labels?: [string]: string
// +usage=Specify the volumes listed in “.spec.volumes” to mount into the main container’s filesystem
volumeMounts?: [...{
name: string
mountPath: string
Expand All @@ -62,6 +92,21 @@ template: {
type: *"Directory" | string
}
}]
// +usage=Specify the dependencies captures all possible types of dependencies of a Spark application
deps?: {
// +usage=Specify a list of JAR files the Spark application depends on
jars?: [...string]
// +usage=Specify a list of files the Spark application depends on
files?: [...string]
// +usage=Specify a list of Python files the Spark application depends on
pyFiles?: [...string]
// +usage=Specify a list of maven coordinates of jars to include on the driver and executor classpaths. This will search the local maven repo, then maven central and any additional remote repositories given by the “repositories” option. Each package should be of the form “groupId:artifactId:version”
packages?: [...string]
// +usage=Specify a list of “groupId:artifactId”, to exclude while resolving the dependencies provided in Packages to avoid dependency conflicts
excludePackages?: [...string]
// +usage=Specify a list of additional remote repositories to search for the maven coordinate given with the “packages” option
repositories?: [...string]
}
}

output: {
Expand Down

0 comments on commit 4b52ee4

Please sign in to comment.