Skip to content

Mention support for Spark 4.0 in docs #20830

Mention support for Spark 4.0 in docs

Mention support for Spark 4.0 in docs #20830

Workflow file for this run

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: CI
on:
push:
branches:
- master
- branch-*
pull_request:
branches:
- master
- branch-*
concurrency:
group: test-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
MVN_OPT: -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Pjdbc-shaded,gen-policy -Dmaven.plugin.download.cache.path=/tmp/engine-archives
KUBERNETES_VERSION: v1.26.1
MINIKUBE_VERSION: v1.29.0
jobs:
default:
name: Kyuubi and Spark Test
runs-on: ubuntu-22.04
strategy:
fail-fast: false
max-parallel: 6
matrix:
java:
- 8
- 17
spark:
- '3.2'
- '3.3'
- '3.4'
- '3.5'
spark-archive: [""]
exclude-tags: [""]
comment: ["normal"]
include:
- java: 8
spark: '3.5'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz -Pzookeeper-3.6'
exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-3.2-binary'
- java: 8
spark: '3.5'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.3.3 -Dspark.archive.name=spark-3.3.3-bin-hadoop3.tgz -Pzookeeper-3.6'
exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-3.3-binary'
- java: 8
spark: '3.5'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.4.3 -Dspark.archive.name=spark-3.4.3-bin-hadoop3.tgz -Pzookeeper-3.6'
exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PaimonTest,org.apache.kyuubi.tags.SparkLocalClusterTest'
comment: 'verify-on-spark-3.4-binary'
exclude:
# SPARK-33772: Spark supports JDK 17 since 3.3.0
- java: 17
spark: '3.2'
env:
SPARK_LOCAL_IP: localhost
steps:
- uses: actions/checkout@v4
- name: Free up disk space
run: ./.github/scripts/free_disk_space.sh
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Setup JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: ${{ matrix.java }}
cache: 'maven'
check-latest: false
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: Setup Maven
uses: ./.github/actions/setup-maven
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Build and test Kyuubi and Spark with maven w/o linters
run: |
if [[ "${{ matrix.java }}" == "8" && "${{ matrix.spark }}" == "3.5" && "${{ matrix.spark-archive }}" == "" ]]; then
MVN_OPT="${MVN_OPT} -Pcodecov"
fi
TEST_MODULES="dev/kyuubi-codecov"
./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \
-Pjava-${{ matrix.java }} -Pspark-${{ matrix.spark }} -Pspark-authz-hudi-test ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }}
- name: Code coverage
if: |
matrix.java == 8 &&
matrix.spark == '3.5' &&
matrix.spark-archive == ''
uses: codecov/codecov-action@v3
with:
verbose: true
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-java-${{ matrix.java }}-spark-${{ matrix.spark }}-${{ matrix.comment }}
path: |
**/target/unit-tests.log
**/kyuubi-spark-sql-engine.log*
**/kyuubi-spark-batch-submit.log*
scala-test:
name: Scala Test
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
scala:
- '2.13'
java:
- '8'
spark:
- '3.5'
steps:
- uses: actions/checkout@v4
- name: Free up disk space
run: ./.github/scripts/free_disk_space.sh
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Setup JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: ${{ matrix.java }}
cache: 'maven'
check-latest: false
- name: Setup Maven
uses: ./.github/actions/setup-maven
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: Build on Scala ${{ matrix.scala }}
run: |
TEST_MODULES="!externals/kyuubi-flink-sql-engine,!integration-tests/kyuubi-flink-it"
./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \
-Pscala-${{ matrix.scala }} -Pjava-${{ matrix.java }} -Pspark-${{ matrix.spark }}
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-scala-${{ matrix.scala }}-java-${{ matrix.java }}-spark-${{ matrix.spark }}
path: |
**/target/unit-tests.log
**/kyuubi-spark-sql-engine.log*
**/kyuubi-spark-batch-submit.log*
**/kyuubi-jdbc-engine.log*
**/kyuubi-hive-sql-engine.log*
flink-it:
name: Flink Test
runs-on: ubuntu-22.04
strategy:
fail-fast: false
max-parallel: 2
matrix:
java:
- 8
flink:
- '1.17'
- '1.18'
- '1.19'
flink-archive: [ "" ]
comment: [ "normal" ]
include:
- java: 8
flink: '1.17'
flink-archive: '-Dflink.archive.mirror=https://archive.apache.org/dist/flink/flink-1.18.1 -Dflink.archive.name=flink-1.18.1-bin-scala_2.12.tgz'
comment: 'verify-on-flink-1.18-binary'
- java: 8
flink: '1.17'
flink-archive: '-Dflink.archive.mirror=https://archive.apache.org/dist/flink/flink-1.19.0 -Dflink.archive.name=flink-1.19.0-bin-scala_2.12.tgz'
comment: 'verify-on-flink-1.19-binary'
steps:
- uses: actions/checkout@v4
- name: Free up disk space
run: ./.github/scripts/free_disk_space.sh
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Setup JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: ${{ matrix.java }}
cache: 'maven'
check-latest: false
- name: Setup Maven
uses: ./.github/actions/setup-maven
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: Build Flink with maven w/o linters
run: |
TEST_MODULES="externals/kyuubi-flink-sql-engine,integration-tests/kyuubi-flink-it"
./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -Pflink-${{ matrix.flink }} ${{ matrix.flink-archive }} -am clean install -DskipTests
- name: Test Flink
if: matrix.flink-archive == ''
run: |
TEST_MODULES="externals/kyuubi-flink-sql-engine,integration-tests/kyuubi-flink-it"
./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -Pflink-${{ matrix.flink }} ${{ matrix.flink-archive }} test
- name: Cross-version test Flink
if: matrix.flink-archive != ''
run: |
IT_FLINK=`echo "${{ matrix.flink-archive }}" | grep -E 'flink\-([0-9]+\.[0-9]+.[0-9]+)\-bin' -o | grep -E '[0-9]+\.[0-9]+' -o`
IT_MODULE="integration-tests/kyuubi-flink-it"
./build/mvn ${MVN_OPT} -pl ${IT_MODULE} -Pflink-${IT_FLINK} ${{ matrix.flink-archive }} test
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-java-${{ matrix.java }}-flink-${{ matrix.flink }}-${{ matrix.comment }}
path: |
**/target/unit-tests.log
**/kyuubi-flink-sql-engine.log*
hive-it:
name: Hive Test
runs-on: ubuntu-22.04
strategy:
fail-fast: false
max-parallel: 1
matrix:
java:
- 8
hive-archive: [ "" ]
comment: [ "normal" ]
include:
- java: 8
hive-archive: '-Dhive.archive.mirror=https://archive.apache.org/dist/hive/hive-2.3.10 -Dhive.archive.name=apache-hive-2.3.10-bin.tar.gz'
comment: 'verify-on-hive-2.3-binary'
- java: 8
hive-archive: '-Dhive.archive.mirror=https://github.com/pan3793/cdh-hive/releases/download/cdh6.3.2-release -Dhive.archive.name=apache-hive-2.1.1-cdh6.3.2-bin.tar.gz'
comment: 'verify-on-hive-2.1-cdh6-binary'
steps:
- uses: actions/checkout@v4
- name: Free up disk space
run: ./.github/scripts/free_disk_space.sh
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Setup JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: ${{ matrix.java }}
cache: 'maven'
check-latest: false
- name: Setup Maven
uses: ./.github/actions/setup-maven
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: Build and test Hive with maven w/o linters
run: |
TEST_MODULES="externals/kyuubi-hive-sql-engine,integration-tests/kyuubi-hive-it"
./build/mvn ${MVN_OPT} ${{ matrix.hive-archive }} -pl ${TEST_MODULES} -am clean install -DskipTests
./build/mvn ${MVN_OPT} ${{ matrix.hive-archive }} -pl ${TEST_MODULES} test
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-java-${{ matrix.java }}-hive-${{ matrix.comment }}
path: |
**/target/unit-tests.log
**/kyuubi-hive-sql-engine.log*
jdbc-trino-tpc-it:
name: JDBC Trino TPC Tests
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
java: [ 8 ]
comment: [ "normal" ]
steps:
- uses: actions/checkout@v4
- name: Free up disk space
run: ./.github/scripts/free_disk_space.sh
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Setup JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: ${{ matrix.java }}
cache: 'maven'
check-latest: false
- name: Setup Maven
uses: ./.github/actions/setup-maven
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: Build and test JDBC and Trino engines, TPC-H/TPC-DS with maven w/o linters
run: |
JDBC_TEST_MODULES="externals/kyuubi-jdbc-engine,integration-tests/kyuubi-jdbc-it"
TRINO_TEST_MODULES="externals/kyuubi-trino-engine,integration-tests/kyuubi-trino-it"
TPC_TEST_MODULES="kyuubi-server,extensions/spark/kyuubi-spark-connector-tpcds,extensions/spark/kyuubi-spark-connector-tpch"
./build/mvn ${MVN_OPT} -pl ${JDBC_TEST_MODULES},${TRINO_TEST_MODULES},${TPC_TEST_MODULES} -am clean install -DskipTests
./build/mvn ${MVN_OPT} -pl ${JDBC_TEST_MODULES} test
./build/mvn ${MVN_OPT} -pl ${TRINO_TEST_MODULES} test
./build/mvn ${MVN_OPT} -pl ${TPC_TEST_MODULES} test \
-Dmaven.plugin.scalatest.exclude.tags='' \
-Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds,org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite,org.apache.kyuubi.spark.connector.tpch.TPCHQuerySuite
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-java-${{ matrix.java }}-hive-${{ matrix.comment }}
path: |
**/target/unit-tests.log
**/kyuubi-jdbc-engine.log*
**/kyuubi-trino-engine.log*
kyuubi-on-k8s-it:
name: Kyuubi Server On Kubernetes Integration Test
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Free up disk space
run: ./.github/scripts/free_disk_space.sh
# https://github.com/docker/build-push-action
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Kyuubi Docker Image
uses: docker/build-push-action@v5
with:
# passthrough CI into build container
build-args: |
CI=${CI}
MVN_ARG=--flink-provided --hive-provided -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -DskipTests
context: .
file: build/Dockerfile.CI
load: true
tags: apache/kyuubi:latest
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: Setup Minikube
run: |
# https://minikube.sigs.k8s.io/docs/start/
curl -LO https://github.com/kubernetes/minikube/releases/download/${MINIKUBE_VERSION}/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube
minikube start --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --force
# https://minikube.sigs.k8s.io/docs/handbook/pushing/#7-loading-directly-to-in-cluster-container-runtime
minikube image load apache/kyuubi:latest
# pre-install spark into minikube
docker pull apache/spark:3.5.1
minikube image load apache/spark:3.5.1
- name: kubectl pre-check
run: |
kubectl get nodes
kubectl get serviceaccount
kubectl create serviceaccount kyuubi
kubectl create clusterrolebinding kyuubi-role --clusterrole=edit --serviceaccount=default:kyuubi
kubectl get serviceaccount
- name: start kyuubi
run: kubectl apply -f integration-tests/kyuubi-kubernetes-it/src/test/resources/kyuubi-server.yaml
- name: kyuubi pod check
run: |
kubectl get pods
kubectl describe pods kyuubi-test
- name: integration tests
run: >-
./build/mvn ${MVN_OPT} clean install
-pl integration-tests/kyuubi-kubernetes-it -am
-Pkubernetes-it
-Dtest=none -DwildcardSuites=org.apache.kyuubi.kubernetes.test.deployment,org.apache.kyuubi.kubernetes.test.KubernetesUtilsTest
- name: Cat kyuubi server log
if: failure()
run: kubectl logs kyuubi-test
- name: Copy spark engine log from kyuubi pod
if: failure()
run: kubectl cp kyuubi-test:/opt/kyuubi/work ./target/work
- name: Cat spark driver log
if: failure()
run: |
kubectl get pods
kubectl get pods | grep driver | awk -F " " '{print$1}' | xargs -I {} kubectl logs {}
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-kyuubi-on-k8s-it
path: |
**/target/unit-tests.log
**/target/work/**
spark-on-k8s-it:
name: Spark Engine On Kubernetes Integration Test
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Free up disk space
run: ./.github/scripts/free_disk_space.sh
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: Setup Minikube
run: |
# https://minikube.sigs.k8s.io/docs/start/
curl -LO https://github.com/kubernetes/minikube/releases/download/${MINIKUBE_VERSION}/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube
minikube start --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --force
# in case: https://spark.apache.org/docs/latest/running-on-kubernetes.html#rbac
- name: Create Service Account
run: |
kubectl create serviceaccount spark
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=default:spark --namespace=default
kubectl get serviceaccount
# TODO Support more Spark version
- name: integration tests
run: >-
./build/mvn ${MVN_OPT} clean install
-Pflink-provided,hive-provided
-Pkubernetes-it
-Dtest=none -DwildcardSuites=org.apache.kyuubi.kubernetes.test.spark
- name: Print Driver Pod logs
if: failure()
run: |
kubectl get pods
kubectl get pods | grep driver | awk -F " " '{print$1}' | xargs -I {} kubectl logs {}
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-spark-on-k8s-it
path: |
**/target/unit-tests.log
**/kyuubi-spark-sql-engine.log*
**/kyuubi-spark-batch-submit.log*
zookeeper-it:
name: Zookeeper Integration Test
runs-on: ubuntu-22.04
strategy:
fail-fast: false
max-parallel: 1
matrix:
java: [ 8 ]
zookeeper: ["3.4", "3.5", "3.6", "3.7" ]
comment: [ "normal" ]
steps:
- uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Setup JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: ${{ matrix.java }}
cache: 'maven'
check-latest: false
- name: Setup Maven
uses: ./.github/actions/setup-maven
- name: Cache Engine Archives
uses: ./.github/actions/cache-engine-archives
- name: zookeeper integration tests
run: |
export KYUUBI_IT_ZOOKEEPER_VERSION=${{ matrix.zookeeper }}
TEST_MODULES="integration-tests/kyuubi-zookeeper-it"
./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install -DskipTests
./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} test
- name: Upload test logs
if: failure()
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-java-${{ matrix.java }}-zookeeper-${{ matrix.comment }}
path: |
**/target/unit-tests.log