Skip to content

Commit

Permalink
update to 7.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
panbingkun committed Dec 18, 2024
2 parents ae45d24 + 229118c commit dbd6d12
Show file tree
Hide file tree
Showing 60 changed files with 1,771 additions and 1,541 deletions.
38 changes: 26 additions & 12 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ on:
description: Additional environment variables to set when running the tests. Should be in JSON format.
required: false
type: string
default: '{}'
default: '{"PYSPARK_IMAGE_TO_TEST": "python-311", "PYTHON_TO_TEST": "python3.11"}'
jobs:
description: >-
Jobs to run, and should be in JSON format. The values should be matched with the job's key defined
Expand All @@ -55,7 +55,6 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_PREV_SHA: ${{ github.event.before }}
PYSPARK_IMAGE_TO_TEST: ''
outputs:
required: ${{ steps.set-outputs.outputs.required }}
image_url: ${{ steps.infra-image-outputs.outputs.image_url }}
Expand Down Expand Up @@ -135,6 +134,28 @@ jobs:
precondition="${precondition//$'\n'/}"
echo "required=$precondition" >> $GITHUB_OUTPUT
fi
- name: Check envs
id: check-envs
if: inputs.branch != 'branch-3.5'
env: ${{ fromJSON(inputs.envs) }}
run: |
if [[ "${{ fromJson(steps.set-outputs.outputs.required).pyspark }}" == 'true' ]]; then
if [[ "${{ env.PYSPARK_IMAGE_TO_TEST }}" == "" ]]; then
echo "PYSPARK_IMAGE_TO_TEST is required when pyspark is enabled."
exit 1
fi
PYSPARK_IMAGE_PATH="dev/spark-test-image/${{ env.PYSPARK_IMAGE_TO_TEST }}/Dockerfile"
if [ -f $PYSPARK_IMAGE_PATH ]; then
echo "Dockerfile $PYSPARK_IMAGE_PATH exists."
else
echo "Dockerfile $PYSPARK_IMAGE_PATH does NOT exist."
exit 1
fi
if [[ "${{ env.PYTHON_TO_TEST }}" == "" ]]; then
echo "PYTHON_TO_TEST is required when pyspark is enabled."
exit 1
fi
fi
- name: Generate infra image URL
id: infra-image-outputs
run: |
Expand Down Expand Up @@ -192,11 +213,7 @@ jobs:
echo "image_docs_url_link=${{ steps.infra-image-docs-outputs.outputs.image_docs_url }}" >> $GITHUB_OUTPUT
echo "image_lint_url_link=${{ steps.infra-image-lint-outputs.outputs.image_lint_url }}" >> $GITHUB_OUTPUT
echo "image_sparkr_url_link=${{ steps.infra-image-sparkr-outputs.outputs.image_sparkr_url }}" >> $GITHUB_OUTPUT
if [[ "${{ env.PYSPARK_IMAGE_TO_TEST }}" != "" ]]; then
echo "image_pyspark_url_link=${{ steps.infra-image-pyspark-outputs.outputs.image_pyspark_url }}" >> $GITHUB_OUTPUT
else
echo "image_pyspark_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
fi
echo "image_pyspark_url_link=${{ steps.infra-image-pyspark-outputs.outputs.image_pyspark_url }}" >> $GITHUB_OUTPUT
fi
# Build: build Spark and run the tests for specified modules.
Expand Down Expand Up @@ -380,8 +397,6 @@ jobs:
runs-on: ubuntu-latest
permissions:
packages: write
env:
PYSPARK_IMAGE_TO_TEST: ''
steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
Expand Down Expand Up @@ -450,8 +465,8 @@ jobs:
${{ needs.precondition.outputs.image_sparkr_url }}
# Use the infra image cache to speed up
cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ inputs.branch }}
- name: Build and push (PySpark ${{ env.PYSPARK_IMAGE_TO_TEST }})
if: ${{ env.PYSPARK_IMAGE_TO_TEST }}
- name: Build and push (PySpark with ${{ env.PYSPARK_IMAGE_TO_TEST }})
if: ${{ inputs.branch != 'branch-3.5' && env.PYSPARK_IMAGE_TO_TEST != '' }}
id: docker_build_pyspark
env: ${{ fromJSON(inputs.envs) }}
uses: docker/build-push-action@v6
Expand Down Expand Up @@ -511,7 +526,6 @@ jobs:
- modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part3' }}
env:
MODULES_TO_TEST: ${{ matrix.modules }}
PYTHON_TO_TEST: 'python3.11'
HADOOP_PROFILE: ${{ inputs.hadoop }}
HIVE_PROFILE: hive2.3
# GitHub Actions' default miniconda to use in pip packaging test.
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build_branch35.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ name: "Build (branch-3.5, Scala 2.13, Hadoop 3, JDK 8)"
on:
schedule:
- cron: '0 11 * * *'
workflow_dispatch:

jobs:
run-build:
Expand All @@ -37,6 +38,7 @@ jobs:
envs: >-
{
"SCALA_PROFILE": "scala2.13",
"PYSPARK_IMAGE_TO_TEST": "",
"PYTHON_TO_TEST": "",
"ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"
}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build_branch35_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ name: "Build / Python-only (branch-3.5)"
on:
schedule:
- cron: '0 11 * * *'
workflow_dispatch:

jobs:
run-build:
Expand All @@ -36,6 +37,7 @@ jobs:
hadoop: hadoop3
envs: >-
{
"PYSPARK_IMAGE_TO_TEST": "",
"PYTHON_TO_TEST": ""
}
jobs: >-
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.2.3' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
- name: Install Ruby for documentation generation
uses: ruby/setup-ruby@v1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1591,22 +1591,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase with
}
}


class KafkaMicroBatchV1SourceWithAdminSuite extends KafkaMicroBatchV1SourceSuite {
override def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
}
}

class KafkaMicroBatchV2SourceWithAdminSuite extends KafkaMicroBatchV2SourceSuite {
override def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
}
}

class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
abstract class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
override def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set(
Expand Down Expand Up @@ -1637,7 +1622,7 @@ class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
}
}

class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
abstract class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {

test("V2 Source is used by default") {
val topic = newTopic()
Expand Down Expand Up @@ -1870,6 +1855,35 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
}
}

class KafkaMicroBatchV1SourceWithAdminSuite extends KafkaMicroBatchV1SourceSuite {
override def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
}
}

class KafkaMicroBatchV1SourceWithConsumerSuite extends KafkaMicroBatchV1SourceSuite {
override def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "true")
}
}

class KafkaMicroBatchV2SourceWithAdminSuite extends KafkaMicroBatchV2SourceSuite {
override def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
}
}

class KafkaMicroBatchV2SourceWithConsumerSuite extends KafkaMicroBatchV2SourceSuite {
override def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "true")
}
}


abstract class KafkaSourceSuiteBase extends KafkaSourceTest {

import testImplicits._
Expand Down
62 changes: 31 additions & 31 deletions dev/deps/spark-deps-hadoop-3-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -158,41 +158,41 @@ jsr305/3.0.0//jsr305-3.0.0.jar
jta/1.1//jta-1.1.jar
jul-to-slf4j/2.0.16//jul-to-slf4j-2.0.16.jar
kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
kubernetes-client-api/7.0.0//kubernetes-client-api-7.0.0.jar
kubernetes-client/7.0.0//kubernetes-client-7.0.0.jar
kubernetes-httpclient-vertx/7.0.0//kubernetes-httpclient-vertx-7.0.0.jar
kubernetes-model-admissionregistration/7.0.0//kubernetes-model-admissionregistration-7.0.0.jar
kubernetes-model-apiextensions/7.0.0//kubernetes-model-apiextensions-7.0.0.jar
kubernetes-model-apps/7.0.0//kubernetes-model-apps-7.0.0.jar
kubernetes-model-autoscaling/7.0.0//kubernetes-model-autoscaling-7.0.0.jar
kubernetes-model-batch/7.0.0//kubernetes-model-batch-7.0.0.jar
kubernetes-model-certificates/7.0.0//kubernetes-model-certificates-7.0.0.jar
kubernetes-model-common/7.0.0//kubernetes-model-common-7.0.0.jar
kubernetes-model-coordination/7.0.0//kubernetes-model-coordination-7.0.0.jar
kubernetes-model-core/7.0.0//kubernetes-model-core-7.0.0.jar
kubernetes-model-discovery/7.0.0//kubernetes-model-discovery-7.0.0.jar
kubernetes-model-events/7.0.0//kubernetes-model-events-7.0.0.jar
kubernetes-model-extensions/7.0.0//kubernetes-model-extensions-7.0.0.jar
kubernetes-model-flowcontrol/7.0.0//kubernetes-model-flowcontrol-7.0.0.jar
kubernetes-model-gatewayapi/7.0.0//kubernetes-model-gatewayapi-7.0.0.jar
kubernetes-model-metrics/7.0.0//kubernetes-model-metrics-7.0.0.jar
kubernetes-model-networking/7.0.0//kubernetes-model-networking-7.0.0.jar
kubernetes-model-node/7.0.0//kubernetes-model-node-7.0.0.jar
kubernetes-model-policy/7.0.0//kubernetes-model-policy-7.0.0.jar
kubernetes-model-rbac/7.0.0//kubernetes-model-rbac-7.0.0.jar
kubernetes-model-resource/7.0.0//kubernetes-model-resource-7.0.0.jar
kubernetes-model-scheduling/7.0.0//kubernetes-model-scheduling-7.0.0.jar
kubernetes-model-storageclass/7.0.0//kubernetes-model-storageclass-7.0.0.jar
kubernetes-client-api/7.0.1//kubernetes-client-api-7.0.1.jar
kubernetes-client/7.0.1//kubernetes-client-7.0.1.jar
kubernetes-httpclient-vertx/7.0.1//kubernetes-httpclient-vertx-7.0.1.jar
kubernetes-model-admissionregistration/7.0.1//kubernetes-model-admissionregistration-7.0.1.jar
kubernetes-model-apiextensions/7.0.1//kubernetes-model-apiextensions-7.0.1.jar
kubernetes-model-apps/7.0.1//kubernetes-model-apps-7.0.1.jar
kubernetes-model-autoscaling/7.0.1//kubernetes-model-autoscaling-7.0.1.jar
kubernetes-model-batch/7.0.1//kubernetes-model-batch-7.0.1.jar
kubernetes-model-certificates/7.0.1//kubernetes-model-certificates-7.0.1.jar
kubernetes-model-common/7.0.1//kubernetes-model-common-7.0.1.jar
kubernetes-model-coordination/7.0.1//kubernetes-model-coordination-7.0.1.jar
kubernetes-model-core/7.0.1//kubernetes-model-core-7.0.1.jar
kubernetes-model-discovery/7.0.1//kubernetes-model-discovery-7.0.1.jar
kubernetes-model-events/7.0.1//kubernetes-model-events-7.0.1.jar
kubernetes-model-extensions/7.0.1//kubernetes-model-extensions-7.0.1.jar
kubernetes-model-flowcontrol/7.0.1//kubernetes-model-flowcontrol-7.0.1.jar
kubernetes-model-gatewayapi/7.0.1//kubernetes-model-gatewayapi-7.0.1.jar
kubernetes-model-metrics/7.0.1//kubernetes-model-metrics-7.0.1.jar
kubernetes-model-networking/7.0.1//kubernetes-model-networking-7.0.1.jar
kubernetes-model-node/7.0.1//kubernetes-model-node-7.0.1.jar
kubernetes-model-policy/7.0.1//kubernetes-model-policy-7.0.1.jar
kubernetes-model-rbac/7.0.1//kubernetes-model-rbac-7.0.1.jar
kubernetes-model-resource/7.0.1//kubernetes-model-resource-7.0.1.jar
kubernetes-model-scheduling/7.0.1//kubernetes-model-scheduling-7.0.1.jar
kubernetes-model-storageclass/7.0.1//kubernetes-model-storageclass-7.0.1.jar
lapack/3.0.3//lapack-3.0.3.jar
leveldbjni-all/1.8//leveldbjni-all-1.8.jar
libfb303/0.9.3//libfb303-0.9.3.jar
libthrift/0.16.0//libthrift-0.16.0.jar
listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar
log4j-1.2-api/2.24.2//log4j-1.2-api-2.24.2.jar
log4j-api/2.24.2//log4j-api-2.24.2.jar
log4j-core/2.24.2//log4j-core-2.24.2.jar
log4j-layout-template-json/2.24.2//log4j-layout-template-json-2.24.2.jar
log4j-slf4j2-impl/2.24.2//log4j-slf4j2-impl-2.24.2.jar
log4j-1.2-api/2.24.3//log4j-1.2-api-2.24.3.jar
log4j-api/2.24.3//log4j-api-2.24.3.jar
log4j-core/2.24.3//log4j-core-2.24.3.jar
log4j-layout-template-json/2.24.3//log4j-layout-template-json-2.24.3.jar
log4j-slf4j2-impl/2.24.3//log4j-slf4j2-impl-2.24.3.jar
lz4-java/1.8.0//lz4-java-1.8.0.jar
metrics-core/4.2.29//metrics-core-4.2.29.jar
metrics-graphite/4.2.29//metrics-graphite-4.2.29.jar
Expand Down Expand Up @@ -281,7 +281,7 @@ wildfly-openssl/1.1.3.Final//wildfly-openssl-1.1.3.Final.jar
xbean-asm9-shaded/4.26//xbean-asm9-shaded-4.26.jar
xmlschema-core/2.3.1//xmlschema-core-2.3.1.jar
xz/1.10//xz-1.10.jar
zjsonpatch/7.0.0//zjsonpatch-7.0.0.jar
zjsonpatch/7.0.1//zjsonpatch-7.0.1.jar
zookeeper-jute/3.9.3//zookeeper-jute-3.9.3.jar
zookeeper/3.9.3//zookeeper-3.9.3.jar
zstd-jni/1.5.6-8//zstd-jni-1.5.6-8.jar
30 changes: 16 additions & 14 deletions dev/spark-test-image-util/docs/build-docs
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,22 @@ docker run \
--interactive --tty "${IMG_URL}" \
/bin/bash -c "sh ${BUILD_DOCS_SCRIPT_PATH}"

# 4.Build docs on host: `r doc`.
#
# Why does `r` document need to be compiled outside the container?
# Because when compiling inside the container, the permission of the directory
# `/__w/spark/spark/R/pkg/docs` automatically generated by `RScript` is `dr-xr--r-x`,
# and when writing to subsequent files, will throw an error as:
# `! [EACCES] Failed to copy '/usr/local/lib/R/site-library/pkgdown/BS5/assets/katex-auto.js'
# to '/__w/spark/spark/R/pkg/docs/katex-auto.js': permission denied`
export SKIP_ERRORDOC=1
export SKIP_SCALADOC=1
export SKIP_PYTHONDOC=1
export SKIP_SQLDOC=1
cd docs
bundle exec jekyll build
if [[ "$SKIP_RDOC" != "1" ]]; then
# 4.Build docs on host: `r doc`.
#
# Why does `r` document need to be compiled outside the container?
# Because when compiling inside the container, the permission of the directory
# `/__w/spark/spark/R/pkg/docs` automatically generated by `RScript` is `dr-xr--r-x`,
# and when writing to subsequent files, will throw an error as:
# `! [EACCES] Failed to copy '/usr/local/lib/R/site-library/pkgdown/BS5/assets/katex-auto.js'
# to '/__w/spark/spark/R/pkg/docs/katex-auto.js': permission denied`
export SKIP_ERRORDOC=1
export SKIP_SCALADOC=1
export SKIP_PYTHONDOC=1
export SKIP_SQLDOC=1
cd docs
bundle exec jekyll build
fi

# 5.Remove container image.
IMG_ID=$(docker images | grep "${IMG_TAG}" | awk '{print $3}')
Expand Down
2 changes: 1 addition & 1 deletion dev/spark-test-image/docs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,6 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \
&& python3.9 -m pip cache purge
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
<sbt.project.name>spark</sbt.project.name>
<asm.version>9.7.1</asm.version>
<slf4j.version>2.0.16</slf4j.version>
<log4j.version>2.24.2</log4j.version>
<log4j.version>2.24.3</log4j.version>
<!-- make sure to update IsolatedClientLoader whenever this version is changed -->
<hadoop.version>3.4.1</hadoop.version>
<!-- SPARK-41247: When updating `protobuf.version`, also need to update `protoVersion` in `SparkBuild.scala` -->
Expand Down Expand Up @@ -235,7 +235,7 @@

<!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
<leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
<kubernetes-client.version>7.0.0</kubernetes-client.version>
<kubernetes-client.version>7.0.1</kubernetes-client.version>

<test.java.home>${java.home}</test.java.home>

Expand Down
2 changes: 1 addition & 1 deletion python/docs/source/development/debugging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ Stack Traces
There are Spark configurations to control stack traces:
- ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` is true by default to simplify traceback from Python UDFs.
- ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` is true by default to simplify traceback from Python UDFs and Data Sources.
- ``spark.sql.pyspark.jvmStacktrace.enabled`` is false by default to hide JVM stacktrace and to show a Python-friendly exception only.
Expand Down
5 changes: 0 additions & 5 deletions python/pyspark/errors/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,6 @@
"Remote client cannot create a SparkContext. Create SparkSession instead."
]
},
"DATA_SOURCE_CREATE_ERROR": {
"message": [
"Failed to create python data source instance, error: <error>."
]
},
"DATA_SOURCE_INVALID_RETURN_TYPE": {
"message": [
"Unsupported return type ('<type>') from Python data source '<name>'. Expected types: <supported_types>."
Expand Down
Loading

0 comments on commit dbd6d12

Please sign in to comment.