Skip to content

[WIP] support more actions for volcano job failure scenario #4767

[WIP] support more actions for volcano job failure scenario

[WIP] support more actions for volcano job failure scenario #4767

Workflow file for this run

name: E2E Spark Integration Test
on:
# Post check in below branches
push:
branches:
- master
# PR triggered job
pull_request:
jobs:
k8s-integration-tests:
name: "E2E about Spark Integration test"
runs-on: ubuntu-20.04
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Checkout current Volcano repository
if: github.event.inputs.volcano-branch==''
uses: actions/checkout@v3
- name: Checkout Spark repository
uses: actions/checkout@v3
with:
fetch-depth: 0
repository: apache/spark
ref: branch-3.4
path: ${{ github.workspace }}/spark
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
with:
path: ~/.cache/coursier
key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
k8s-integration-coursier-
- name: Install Java 8
uses: actions/setup-java@v3
with:
distribution: temurin
java-version: 8
- name: Install Go
uses: actions/setup-go@v4
with:
go-version: 1.22.x
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2
- name: start minikube
run: |
# Use pre-install minikube
minikube start --cpus max --memory max
- name: Build lastest volcano images
run: |
eval $(minikube docker-env)
make TAG=latest update-development-yaml
sed -i 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' installer/volcano-development.yaml
make TAG=latest images
docker images | grep volcano
cat ./installer/volcano-development.yaml | grep image:
minikube kubectl -- replace --force -f ./installer/volcano-development.yaml
- name: Show all K8S pods and nodes
run: |
# Use minikube kubectl
minikube kubectl -- get pods -A
minikube kubectl -- get nodes -oyaml
- name: Run K8S integration test
run: |
eval $(minikube docker-env)
minikube kubectl -- create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
build/sbt -Pvolcano -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.include.tags=volcano "kubernetes-integration-tests/test"
working-directory: ${{ github.workspace }}/spark
- name: Collect Volcano logs
if: failure()
run: |
kubectl logs $(kubectl get po -nvolcano-system | grep volcano-scheduler |cut -d" " -f1) -nvolcano-system > volcano-scheduler.log
kubectl logs $(kubectl get po -nvolcano-system | grep volcano-admission |cut -d" " -f1 | grep -v init) -nvolcano-system > volcano-admission.log
kubectl logs $(kubectl get po -nvolcano-system | grep volcano-controllers |cut -d" " -f1) -nvolcano-system > volcano-controllers.log
kubectl get pod -A
kubectl describe node
- name: Upload Spark on K8S integration tests log files
if: failure()
uses: actions/upload-artifact@v4
with:
name: spark-on-kubernetes-it-log
path: |
**/target/integration-tests.log
**/volcano-*.log
- name: Cleanup minikube
if: always()
run: |
minikube delete