Skip to content

Commit

Permalink
[HUDI-7919] Migrate integration tests to run on Spark 3.5 (#11994)
Browse files Browse the repository at this point in the history
  • Loading branch information
yihua authored Sep 30, 2024
1 parent 7f8753a commit d504a99
Show file tree
Hide file tree
Showing 22 changed files with 121 additions and 76 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,8 @@ jobs:
strategy:
matrix:
include:
- sparkProfile: 'spark2.4'
sparkArchive: 'spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz'
- sparkProfile: 'spark3.5'
sparkArchive: 'spark-3.5.3/spark-3.5.3-bin-hadoop3.tgz'
steps:
- uses: actions/checkout@v3
- name: Set up JDK 8
Expand All @@ -642,20 +642,20 @@ jobs:
- name: Build Project
env:
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SCALA_PROFILE: '-Dscala-2.11 -Dscala.binary.version=2.11'
SCALA_PROFILE: '-Dscala-2.12 -Dscala.binary.version=2.12'
run:
mvn clean install -T 2 $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -DskipTests=true $MVN_ARGS
- name: 'UT integ-test'
env:
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SCALA_PROFILE: '-Dscala-2.11 -Dscala.binary.version=2.11'
SCALA_PROFILE: '-Dscala-2.12 -Dscala.binary.version=2.12'
run:
mvn test $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -DskipUTs=false -DskipITs=true -pl hudi-integ-test $MVN_ARGS
- name: 'IT'
env:
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_ARCHIVE: ${{ matrix.sparkArchive }}
SCALA_PROFILE: '-Dscala-2.11 -Dscala.binary.version=2.11'
SCALA_PROFILE: '-Dscala-2.12 -Dscala.binary.version=2.12'
run: |
echo "Downloading $SPARK_ARCHIVE"
curl https://archive.apache.org/dist/spark/$SPARK_ARCHIVE --create-dirs -o $GITHUB_WORKSPACE/$SPARK_ARCHIVE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

version: "3.3"

services:

namenode:
image: apachehudi/hudi-hadoop_2.8.4-namenode:latest
platform: linux/amd64
hostname: namenode
container_name: namenode
environment:
- CLUSTER_NAME=hudi_hadoop284_hive232_spark244
- CLUSTER_NAME=hudi_hadoop284_hive232_spark353
ports:
- "50070:50070"
- "8020:8020"
Expand All @@ -38,10 +37,11 @@ services:

datanode1:
image: apachehudi/hudi-hadoop_2.8.4-datanode:latest
platform: linux/amd64
container_name: datanode1
hostname: datanode1
environment:
- CLUSTER_NAME=hudi_hadoop284_hive232_spark244
- CLUSTER_NAME=hudi_hadoop284_hive232_spark353
env_file:
- ./hadoop.env
ports:
Expand All @@ -62,10 +62,11 @@ services:

historyserver:
image: apachehudi/hudi-hadoop_2.8.4-history:latest
platform: linux/amd64
hostname: historyserver
container_name: historyserver
environment:
- CLUSTER_NAME=hudi_hadoop284_hive232_spark244
- CLUSTER_NAME=hudi_hadoop284_hive232_spark353
depends_on:
- "namenode"
links:
Expand All @@ -91,6 +92,7 @@ services:

hivemetastore:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest
platform: linux/amd64
hostname: hivemetastore
container_name: hivemetastore
links:
Expand All @@ -116,6 +118,7 @@ services:

hiveserver:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest
platform: linux/amd64
hostname: hiveserver
container_name: hiveserver
env_file:
Expand All @@ -136,7 +139,8 @@ services:
- ${HUDI_WS}:/var/hoodie/ws

sparkmaster:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.4.4:latest
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_3.5.3:latest
platform: linux/amd64
hostname: sparkmaster
container_name: sparkmaster
env_file:
Expand All @@ -155,7 +159,8 @@ services:
- "namenode"

spark-worker-1:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.4.4:latest
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_3.5.3:latest
platform: linux/amd64
hostname: spark-worker-1
container_name: spark-worker-1
env_file:
Expand Down Expand Up @@ -197,6 +202,7 @@ services:
container_name: presto-coordinator-1
hostname: presto-coordinator-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
platform: linux/amd64
ports:
- "8090:8090"
# JVM debugging port (will be mapped to a random port on host)
Expand All @@ -218,6 +224,7 @@ services:
container_name: presto-worker-1
hostname: presto-worker-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
platform: linux/amd64
depends_on: [ "presto-coordinator-1" ]
environment:
- PRESTO_JVM_MAX_HEAP=512M
Expand All @@ -239,6 +246,7 @@ services:
container_name: trino-coordinator-1
hostname: trino-coordinator-1
image: apachehudi/hudi-hadoop_2.8.4-trinocoordinator_368:latest
platform: linux/amd64
ports:
- "8091:8091"
# JVM debugging port (will be mapped to a random port on host)
Expand All @@ -253,6 +261,7 @@ services:
container_name: trino-worker-1
hostname: trino-worker-1
image: apachehudi/hudi-hadoop_2.8.4-trinoworker_368:latest
platform: linux/amd64
depends_on: [ "trino-coordinator-1" ]
ports:
- "8092:8092"
Expand All @@ -277,7 +286,8 @@ services:
- 8126:8126

adhoc-1:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_3.5.3:latest
platform: linux/amd64
hostname: adhoc-1
container_name: adhoc-1
env_file:
Expand All @@ -301,7 +311,8 @@ services:
- ${HUDI_WS}:/var/hoodie/ws

adhoc-2:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_3.5.3:latest
platform: linux/amd64
hostname: adhoc-2
container_name: adhoc-2
env_file:
Expand Down
1 change: 1 addition & 0 deletions docker/demo/sparksql-incremental.commands
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.FileSystem;

val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
val beginInstantTime = HoodieDataSourceHelpers.listCommitsSince(fs, "/user/hive/warehouse/stock_ticks_cow", "00000").get(0)
println("Begin instant time for incremental query: " + beginInstantTime)
val hoodieIncQueryDF = spark.read.format("org.apache.hudi").
option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL).
option(DataSourceReadOptions.BEGIN_INSTANTTIME.key(), beginInstantTime).
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM openjdk:8u212-jdk-slim-stretch
FROM openjdk:8u342-jdk-slim-bullseye
MAINTAINER Hoodie
USER root

Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
<properties>
<skipITs>false</skipITs>
<docker.build.skip>true</docker.build.skip>
<docker.spark.version>2.4.4</docker.spark.version>
<docker.spark.version>3.5.3</docker.spark.version>
<docker.hive.version>2.3.3</docker.hive.version>
<docker.hadoop.version>2.8.4</docker.hadoop.version>
<docker.presto.version>0.271</docker.presto.version>
Expand Down
7 changes: 4 additions & 3 deletions docker/hoodie/hadoop/spark_base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG HADOOP_VERSION=2.8.4
ARG HADOOP_VERSION=2.8.4
ARG HIVE_VERSION=2.3.3
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}

ENV ENABLE_INIT_DAEMON true
ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
ENV INIT_DAEMON_STEP spark_master_init

ARG SPARK_VERSION=2.4.4
ARG SPARK_HADOOP_VERSION=2.7
ARG SPARK_VERSION=3.5.3
ARG SPARK_HADOOP_VERSION=3

ENV SPARK_VERSION ${SPARK_VERSION}
ENV HADOOP_VERSION ${SPARK_HADOOP_VERSION}
Expand All @@ -33,6 +33,7 @@ COPY wait-for-step.sh /
COPY execute-step.sh /
COPY finish-step.sh /

# Need to do this all in one step because running separate commands doubles the image size
RUN echo "Installing Spark-version (${SPARK_VERSION})" \
&& wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
Expand Down
4 changes: 2 additions & 2 deletions docker/hoodie/hadoop/sparkadhoc/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG HADOOP_VERSION=2.8.4
ARG HADOOP_VERSION=2.8.4
ARG HIVE_VERSION=2.3.3
ARG SPARK_VERSION=2.4.4
ARG SPARK_VERSION=3.5.3
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}

ARG PRESTO_VERSION=0.268
Expand Down
4 changes: 2 additions & 2 deletions docker/hoodie/hadoop/sparkmaster/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG HADOOP_VERSION=2.8.4
ARG HADOOP_VERSION=2.8.4
ARG HIVE_VERSION=2.3.3
ARG SPARK_VERSION=2.4.4
ARG SPARK_VERSION=3.5.3
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}

COPY master.sh /opt/spark
Expand Down
4 changes: 2 additions & 2 deletions docker/hoodie/hadoop/sparkworker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG HADOOP_VERSION=2.8.4
ARG HADOOP_VERSION=2.8.4
ARG HIVE_VERSION=2.3.3
ARG SPARK_VERSION=2.4.4
ARG SPARK_VERSION=3.5.3
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}

COPY worker.sh /opt/spark
Expand Down
2 changes: 1 addition & 1 deletion docker/setup_demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
SCRIPT_PATH=$(cd `dirname $0`; pwd)
HUDI_DEMO_ENV=$1
WS_ROOT=`dirname $SCRIPT_PATH`
COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244.yml"
COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark353_amd64.yml"
if [ "$HUDI_DEMO_ENV" = "--mac-aarch64" ]; then
COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml"
fi
Expand Down
2 changes: 1 addition & 1 deletion docker/stop_demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ SCRIPT_PATH=$(cd `dirname $0`; pwd)
HUDI_DEMO_ENV=$1
# set up root directory
WS_ROOT=`dirname $SCRIPT_PATH`
COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244.yml"
COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark353_amd64.yml"
if [ "$HUDI_DEMO_ENV" = "--mac-aarch64" ]; then
COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml"
fi
Expand Down
6 changes: 4 additions & 2 deletions hudi-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@
<name>amazon/dynamodb-local:${dynamodb-local.version}</name>
<alias>it-database</alias>
<run>
<platform>linux/amd64</platform>
<ports>
<port>${dynamodb-local.port}:${dynamodb-local.port}</port>
</ports>
Expand All @@ -268,11 +269,12 @@
</run>
</image>
<image>
<name>motoserver/moto:${moto.version}</name>
<name>apachehudi/moto:${moto.version}</name>
<alias>it-aws</alias>
<run>
<platform>linux/amd64</platform>
<ports>
<port>${moto.port}:${moto.port}</port>
<port>${moto.port}:5000</port>
</ports>
<wait>
<http>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@

@Disabled("HUDI-7475 The tests do not work. Disabling them to unblock Azure CI")
public class ITTestGluePartitionPushdown {

private static final String MOTO_ENDPOINT = "http://localhost:5000";
// This port number must be the same as {@code moto.port} defined in pom.xml
private static final int MOTO_PORT = 5002;
private static final String MOTO_ENDPOINT = "http://localhost:" + MOTO_PORT;
private static final String DB_NAME = "db_name";
private static final String TABLE_NAME = "tbl_name";
private String basePath = Files.createTempDirectory("hivesynctest" + Instant.now().toEpochMilli()).toUri().toString();
Expand Down
8 changes: 6 additions & 2 deletions hudi-integ-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@

<properties>
<dockerCompose.envFile>${project.basedir}/compose_env</dockerCompose.envFile>
<dockerCompose.file>${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark244.yml</dockerCompose.file>
<dockerCompose.file>
${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark353_amd64.yml
</dockerCompose.file>
<docker.compose.skip>${skipITs}</docker.compose.skip>
<main.basedir>${project.parent.basedir}</main.basedir>
</properties>
Expand Down Expand Up @@ -513,7 +515,9 @@
<profile>
<id>m1-mac</id>
<properties>
<dockerCompose.file>${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml</dockerCompose.file>
<dockerCompose.file>
${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml
</dockerCompose.file>
</properties>
<activation>
<os>
Expand Down
2 changes: 1 addition & 1 deletion hudi-integ-test/prepare_integration_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ usage() {
get_spark_command() {
if [ -z "$scala" ]
then
scala="2.11"
scala="2.12"
else
scala=$scala
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ private boolean checkHealth(String fromContainerName, String hostname, int port)
TestExecStartResultCallback resultCallback =
executeCommandStringInDocker(fromContainerName, command, false, true);
String stderrString = resultCallback.getStderr().toString().trim();
if (!stderrString.contains("open")) {
if (!stderrString.contains("succeeded")) {
Thread.sleep(1000);
return false;
}
Expand Down Expand Up @@ -368,7 +368,8 @@ void assertStdOutContains(Pair<String, String> stdOutErr, String expectedOutput,
}

if (times != count) {
saveUpLogs();
// TODO(HUDI-8268): fix the command with pipe
// saveUpLogs();
}

assertEquals(times, count, "Did not find output the expected number of times.");
Expand Down
Loading

0 comments on commit d504a99

Please sign in to comment.